1 files changed, 113 insertions, 35 deletions
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 3ed9a5a21d77..4e7759c8ca30 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -96,7 +96,8 @@ static struct clocksource clocksource_timebase = {
 	.read         = timebase_read,
 };
 
-#define DECREMENTER_MAX	0x7fffffff
+#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
+u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
 
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev);
@@ -166,7 +167,15 @@ DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 
 cputime_t cputime_one_jiffy;
 
+#ifdef CONFIG_PPC_SPLPAR
 void (*dtl_consumer)(struct dtl_entry *, u64);
+#endif
+
+#ifdef CONFIG_PPC64
+#define get_accounting(tsk)	(&get_paca()->accounting)
+#else
+#define get_accounting(tsk)	(&task_thread_info(tsk)->accounting)
+#endif
 
 static void calc_cputime_factors(void)
 {
@@ -186,7 +195,7 @@ static void calc_cputime_factors(void)
  * Read the SPURR on systems that have it, otherwise the PURR,
  * or if that doesn't exist return the timebase value passed in.
  */
-static u64 read_spurr(u64 tb)
+static unsigned long read_spurr(unsigned long tb)
 {
 	if (cpu_has_feature(CPU_FTR_SPURR))
 		return mfspr(SPRN_SPURR);
@@ -249,8 +258,8 @@ static u64 scan_dispatch_log(u64 stop_tb)
 void accumulate_stolen_time(void)
 {
 	u64 sst, ust;
-
 	u8 save_soft_enabled = local_paca->soft_enabled;
+	struct cpu_accounting_data *acct = &local_paca->accounting;
 
 	/* We are called early in the exception entry, before
 	 * soft/hard_enabled are sync'ed to the expected state
@@ -260,10 +269,10 @@ void accumulate_stolen_time(void)
 	 */
 	local_paca->soft_enabled = 0;
 
-	sst = scan_dispatch_log(local_paca->starttime_user);
-	ust = scan_dispatch_log(local_paca->starttime);
-	local_paca->system_time -= sst;
-	local_paca->user_time -= ust;
+	sst = scan_dispatch_log(acct->starttime_user);
+	ust = scan_dispatch_log(acct->starttime);
+	acct->system_time -= sst;
+	acct->user_time -= ust;
 	local_paca->stolen_time += ust + sst;
 
 	local_paca->soft_enabled = save_soft_enabled;
@@ -275,7 +284,7 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
 
 	if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) {
 		stolen = scan_dispatch_log(stop_tb);
-		get_paca()->system_time -= stolen;
+		get_paca()->accounting.system_time -= stolen;
 	}
 
 	stolen += get_paca()->stolen_time;
@@ -295,27 +304,29 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
  * Account time for a transition between system, hard irq
  * or soft irq state.
  */
-static u64 vtime_delta(struct task_struct *tsk,
-			u64 *sys_scaled, u64 *stolen)
+static unsigned long vtime_delta(struct task_struct *tsk,
+				 unsigned long *sys_scaled,
+				 unsigned long *stolen)
 {
-	u64 now, nowscaled, deltascaled;
-	u64 udelta, delta, user_scaled;
+	unsigned long now, nowscaled, deltascaled;
+	unsigned long udelta, delta, user_scaled;
+	struct cpu_accounting_data *acct = get_accounting(tsk);
 
 	WARN_ON_ONCE(!irqs_disabled());
 
 	now = mftb();
 	nowscaled = read_spurr(now);
-	get_paca()->system_time += now - get_paca()->starttime;
-	get_paca()->starttime = now;
-	deltascaled = nowscaled - get_paca()->startspurr;
-	get_paca()->startspurr = nowscaled;
+	acct->system_time += now - acct->starttime;
+	acct->starttime = now;
+	deltascaled = nowscaled - acct->startspurr;
+	acct->startspurr = nowscaled;
 
 	*stolen = calculate_stolen_time(now);
 
-	delta = get_paca()->system_time;
-	get_paca()->system_time = 0;
-	udelta = get_paca()->user_time - get_paca()->utime_sspurr;
-	get_paca()->utime_sspurr = get_paca()->user_time;
+	delta = acct->system_time;
+	acct->system_time = 0;
+	udelta = acct->user_time - acct->utime_sspurr;
+	acct->utime_sspurr = acct->user_time;
 
 	/*
 	 * Because we don't read the SPURR on every kernel entry/exit,
@@ -337,14 +348,14 @@ static u64 vtime_delta(struct task_struct *tsk,
 			*sys_scaled = deltascaled;
 		}
 	}
-	get_paca()->user_time_scaled += user_scaled;
+	acct->user_time_scaled += user_scaled;
 
 	return delta;
 }
 
 void vtime_account_system(struct task_struct *tsk)
 {
-	u64 delta, sys_scaled, stolen;
+	unsigned long delta, sys_scaled, stolen;
 
 	delta = vtime_delta(tsk, &sys_scaled, &stolen);
 	account_system_time(tsk, 0, delta, sys_scaled);
@@ -355,7 +366,7 @@ EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
-	u64 delta, sys_scaled, stolen;
+	unsigned long delta, sys_scaled, stolen;
 
 	delta = vtime_delta(tsk, &sys_scaled, &stolen);
 	account_idle_time(delta + stolen);
@@ -373,15 +384,32 @@ void vtime_account_idle(struct task_struct *tsk)
 void vtime_account_user(struct task_struct *tsk)
 {
 	cputime_t utime, utimescaled;
+	struct cpu_accounting_data *acct = get_accounting(tsk);
 
-	utime = get_paca()->user_time;
-	utimescaled = get_paca()->user_time_scaled;
-	get_paca()->user_time = 0;
-	get_paca()->user_time_scaled = 0;
-	get_paca()->utime_sspurr = 0;
+	utime = acct->user_time;
+	utimescaled = acct->user_time_scaled;
+	acct->user_time = 0;
+	acct->user_time_scaled = 0;
+	acct->utime_sspurr = 0;
 	account_user_time(tsk, utime, utimescaled);
 }
 
+#ifdef CONFIG_PPC32
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void arch_vtime_task_switch(struct task_struct *prev)
+{
+	struct cpu_accounting_data *acct = get_accounting(current);
+
+	acct->starttime = get_accounting(prev)->starttime;
+	acct->system_time = 0;
+	acct->user_time = 0;
+}
+#endif /* CONFIG_PPC32 */
+
 #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #define calc_cputime_factors()
 #endif
@@ -504,8 +532,8 @@ static void __timer_interrupt(void)
 		__this_cpu_inc(irq_stat.timer_irqs_event);
 	} else {
 		now = *next_tb - now;
-		if (now <= DECREMENTER_MAX)
-			set_dec((int)now);
+		if (now <= decrementer_max)
+			set_dec(now);
 		/* We may have raced with new irq work */
 		if (test_irq_work_pending())
 			set_dec(1);
@@ -535,7 +563,7 @@ void timer_interrupt(struct pt_regs * regs)
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
 	 */
-	set_dec(DECREMENTER_MAX);
+	set_dec(decrementer_max);
 
 	/* Some implementations of hotplug will get timer interrupts while
 	 * offline, just ignore these and we also need to set
@@ -583,9 +611,9 @@ static void generic_suspend_disable_irqs(void)
 	 * with suspending.
 	 */
 
-	set_dec(DECREMENTER_MAX);
+	set_dec(decrementer_max);
 	local_irq_disable();
-	set_dec(DECREMENTER_MAX);
+	set_dec(decrementer_max);
 }
 
 static void generic_suspend_enable_irqs(void)
@@ -866,7 +894,7 @@ static int decrementer_set_next_event(unsigned long evt,
 
 static int decrementer_shutdown(struct clock_event_device *dev)
 {
-	decrementer_set_next_event(DECREMENTER_MAX, dev);
+	decrementer_set_next_event(decrementer_max, dev);
 	return 0;
 }
 
@@ -892,6 +920,49 @@ static void register_decrementer_clockevent(int cpu)
 	clockevents_register_device(dec);
 }
 
+static void enable_large_decrementer(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return;
+
+	if (decrementer_max <= DECREMENTER_DEFAULT_MAX)
+		return;
+
+	/*
+	 * If we're running as the hypervisor we need to enable the LD manually
+	 * otherwise firmware should have done it for us.
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD);
+}
+
+static void __init set_decrementer_max(void)
+{
+	struct device_node *cpu;
+	u32 bits = 32;
+
+	/* Prior to ISAv3 the decrementer is always 32 bit */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return;
+
+	cpu = of_find_node_by_type(NULL, "cpu");
+
+	if (of_property_read_u32(cpu, "ibm,dec-bits", &bits) == 0) {
+		if (bits > 64 || bits < 32) {
+			pr_warn("time_init: firmware supplied invalid ibm,dec-bits");
+			bits = 32;
+		}
+
+		/* calculate the signed maximum given this many bits */
+		decrementer_max = (1ul << (bits - 1)) - 1;
+	}
+
+	of_node_put(cpu);
+
+	pr_info("time_init: %u bit decrementer (max: %llx)\n",
+		bits, decrementer_max);
+}
+
 static void __init init_decrementer_clockevent(void)
 {
 	int cpu = smp_processor_id();
@@ -899,7 +970,7 @@ static void __init init_decrementer_clockevent(void)
 	clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
 
 	decrementer_clockevent.max_delta_ns =
-		clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
+		clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
 	decrementer_clockevent.min_delta_ns =
 		clockevent_delta2ns(2, &decrementer_clockevent);
 
@@ -908,6 +979,9 @@ static void __init init_decrementer_clockevent(void)
 
 void secondary_cpu_time_init(void)
 {
+	/* Enable and test the large decrementer for this cpu */
+	enable_large_decrementer();
+
 	/* Start the decrementer on CPUs that have manual control
 	 * such as BookE
 	 */
@@ -973,6 +1047,10 @@ void __init time_init(void)
 	vdso_data->tb_update_count = 0;
 	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
 
+	/* initialise and enable the large decrementer (if we have one) */
+	set_decrementer_max();
+	enable_large_decrementer();
+
 	/* Start the decrementer on CPUs that have manual control
 	 * such as BookE
 	 */