aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/time.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/time.c')
-rw-r--r--arch/powerpc/kernel/time.c521
1 files changed, 161 insertions, 360 deletions
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1168e8b37e30..a2ab397065c6 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -31,6 +31,7 @@
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/cputime.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
@@ -50,40 +51,32 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/irq_work.h>
-#include <linux/clk-provider.h>
+#include <linux/of_clk.h>
#include <linux/suspend.h>
-#include <linux/sched/cputime.h>
#include <linux/processor.h>
-#include <asm/trace.h>
+#include <linux/mc146818rtc.h>
+#include <linux/platform_device.h>
+#include <asm/trace.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/nvram.h>
#include <asm/cache.h>
#include <asm/machdep.h>
#include <linux/uaccess.h>
#include <asm/time.h>
-#include <asm/prom.h>
#include <asm/irq.h>
#include <asm/div64.h>
#include <asm/smp.h>
#include <asm/vdso_datapage.h>
#include <asm/firmware.h>
-#include <asm/asm-prototypes.h>
+#include <asm/mce.h>
/* powerpc clocksource/clockevent code */
#include <linux/clockchips.h>
#include <linux/timekeeper_internal.h>
-static u64 rtc_read(struct clocksource *);
-static struct clocksource clocksource_rtc = {
- .name = "rtc",
- .rating = 400,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
- .mask = CLOCKSOURCE_MASK(64),
- .read = rtc_read,
-};
-
static u64 timebase_read(struct clocksource *);
static struct clocksource clocksource_timebase = {
.name = "timebase",
@@ -91,10 +84,12 @@ static struct clocksource clocksource_timebase = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.mask = CLOCKSOURCE_MASK(64),
.read = timebase_read,
+ .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER,
};
#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev);
@@ -113,7 +108,13 @@ struct clock_event_device decrementer_clockevent = {
};
EXPORT_SYMBOL(decrementer_clockevent);
-DEFINE_PER_CPU(u64, decrementers_next_tb);
+/*
+ * This always puts next_tb beyond now, so the clock event will never fire
+ * with the usual comparison, no need for a separate test for stopped.
+ */
+#define DEC_CLOCKEVENT_STOPPED ~0ULL
+DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED;
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
static DEFINE_PER_CPU(struct clock_event_device, decrementers);
#define XSEC_PER_SEC (1024*1024)
@@ -156,10 +157,6 @@ bool tb_invalid;
u64 __cputime_usec_factor;
EXPORT_SYMBOL(__cputime_usec_factor);
-#ifdef CONFIG_PPC_SPLPAR
-void (*dtl_consumer)(struct dtl_entry *, u64);
-#endif
-
static void calc_cputime_factors(void)
{
struct div_result res;
@@ -181,99 +178,6 @@ static inline unsigned long read_spurr(unsigned long tb)
return tb;
}
-#ifdef CONFIG_PPC_SPLPAR
-
-/*
- * Scan the dispatch trace log and count up the stolen time.
- * Should be called with interrupts disabled.
- */
-static u64 scan_dispatch_log(u64 stop_tb)
-{
- u64 i = local_paca->dtl_ridx;
- struct dtl_entry *dtl = local_paca->dtl_curr;
- struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
- struct lppaca *vpa = local_paca->lppaca_ptr;
- u64 tb_delta;
- u64 stolen = 0;
- u64 dtb;
-
- if (!dtl)
- return 0;
-
- if (i == be64_to_cpu(vpa->dtl_idx))
- return 0;
- while (i < be64_to_cpu(vpa->dtl_idx)) {
- dtb = be64_to_cpu(dtl->timebase);
- tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
- be32_to_cpu(dtl->ready_to_enqueue_time);
- barrier();
- if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
- /* buffer has overflowed */
- i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
- dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
- continue;
- }
- if (dtb > stop_tb)
- break;
- if (dtl_consumer)
- dtl_consumer(dtl, i);
- stolen += tb_delta;
- ++i;
- ++dtl;
- if (dtl == dtl_end)
- dtl = local_paca->dispatch_log;
- }
- local_paca->dtl_ridx = i;
- local_paca->dtl_curr = dtl;
- return stolen;
-}
-
-/*
- * Accumulate stolen time by scanning the dispatch trace log.
- * Called on entry from user mode.
- */
-void notrace accumulate_stolen_time(void)
-{
- u64 sst, ust;
- unsigned long save_irq_soft_mask = irq_soft_mask_return();
- struct cpu_accounting_data *acct = &local_paca->accounting;
-
- /* We are called early in the exception entry, before
- * soft/hard_enabled are sync'ed to the expected state
- * for the exception. We are hard disabled but the PACA
- * needs to reflect that so various debug stuff doesn't
- * complain
- */
- irq_soft_mask_set(IRQS_DISABLED);
-
- sst = scan_dispatch_log(acct->starttime_user);
- ust = scan_dispatch_log(acct->starttime);
- acct->stime -= sst;
- acct->utime -= ust;
- acct->steal_time += ust + sst;
-
- irq_soft_mask_set(save_irq_soft_mask);
-}
-
-static inline u64 calculate_stolen_time(u64 stop_tb)
-{
- if (!firmware_has_feature(FW_FEATURE_SPLPAR))
- return 0;
-
- if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
- return scan_dispatch_log(stop_tb);
-
- return 0;
-}
-
-#else /* CONFIG_PPC_SPLPAR */
-static inline u64 calculate_stolen_time(u64 stop_tb)
-{
- return 0;
-}
-
-#endif /* CONFIG_PPC_SPLPAR */
-
/*
* Account time for a transition between system, hard irq
* or soft irq state.
@@ -318,12 +222,11 @@ static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
return stime_scaled;
}
-static unsigned long vtime_delta(struct task_struct *tsk,
+static unsigned long vtime_delta(struct cpu_accounting_data *acct,
unsigned long *stime_scaled,
unsigned long *steal_time)
{
unsigned long now, stime;
- struct cpu_accounting_data *acct = get_accounting(tsk);
WARN_ON_ONCE(!irqs_disabled());
@@ -333,34 +236,39 @@ static unsigned long vtime_delta(struct task_struct *tsk,
*stime_scaled = vtime_delta_scaled(acct, now, stime);
- *steal_time = calculate_stolen_time(now);
+ if (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR))
+ *steal_time = pseries_calculate_stolen_time(now);
+ else
+ *steal_time = 0;
return stime;
}
+static void vtime_delta_kernel(struct cpu_accounting_data *acct,
+ unsigned long *stime, unsigned long *stime_scaled)
+{
+ unsigned long steal_time;
+
+ *stime = vtime_delta(acct, stime_scaled, &steal_time);
+ *stime -= min(*stime, steal_time);
+ acct->steal_time += steal_time;
+}
+
void vtime_account_kernel(struct task_struct *tsk)
{
- unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
+ unsigned long stime, stime_scaled;
- stime = vtime_delta(tsk, &stime_scaled, &steal_time);
+ vtime_delta_kernel(acct, &stime, &stime_scaled);
- stime -= min(stime, steal_time);
- acct->steal_time += steal_time;
-
- if ((tsk->flags & PF_VCPU) && !irq_count()) {
+ if (tsk->flags & PF_VCPU) {
acct->gtime += stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->utime_scaled += stime_scaled;
#endif
} else {
- if (hardirq_count())
- acct->hardirq_time += stime;
- else if (in_serving_softirq())
- acct->softirq_time += stime;
- else
- acct->stime += stime;
-
+ acct->stime += stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->stime_scaled += stime_scaled;
#endif
@@ -373,10 +281,34 @@ void vtime_account_idle(struct task_struct *tsk)
unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
- stime = vtime_delta(tsk, &stime_scaled, &steal_time);
+ stime = vtime_delta(acct, &stime_scaled, &steal_time);
acct->idle_time += stime + steal_time;
}
+static void vtime_account_irq_field(struct cpu_accounting_data *acct,
+ unsigned long *field)
+{
+ unsigned long stime, stime_scaled;
+
+ vtime_delta_kernel(acct, &stime, &stime_scaled);
+ *field += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ acct->stime_scaled += stime_scaled;
+#endif
+}
+
+void vtime_account_softirq(struct task_struct *tsk)
+{
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+ vtime_account_irq_field(acct, &acct->softirq_time);
+}
+
+void vtime_account_hardirq(struct task_struct *tsk)
+{
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+ vtime_account_irq_field(acct, &acct->hardirq_time);
+}
+
static void vtime_flush_scaled(struct task_struct *tsk,
struct cpu_accounting_data *acct)
{
@@ -445,19 +377,9 @@ void vtime_flush(struct task_struct *tsk)
void __delay(unsigned long loops)
{
unsigned long start;
- int diff;
spin_begin();
- if (__USE_RTC()) {
- start = get_rtcl();
- do {
- /* the RTCL register wraps at 1000000000 */
- diff = get_rtcl() - start;
- if (diff < 0)
- diff += 1000000000;
- spin_cpu_relax();
- } while (diff < loops);
- } else if (tb_invalid) {
+ if (tb_invalid) {
/*
* TB is in error state and isn't ticking anymore.
* HMI handler was unable to recover from TB error.
@@ -465,8 +387,8 @@ void __delay(unsigned long loops)
*/
spin_cpu_relax();
} else {
- start = get_tbl();
- while (get_tbl() - start < loops)
+ start = mftb();
+ while (mftb() - start < loops)
spin_cpu_relax();
}
spin_end();
@@ -522,35 +444,6 @@ static inline void clear_irq_work_pending(void)
"i" (offsetof(struct paca_struct, irq_work_pending)));
}
-void arch_irq_work_raise(void)
-{
- preempt_disable();
- set_irq_work_pending_flag();
- /*
- * Non-nmi code running with interrupts disabled will replay
- * irq_happened before it re-enables interrupts, so setthe
- * decrementer there instead of causing a hardware exception
- * which would immediately hit the masked interrupt handler
- * and have the net effect of setting the decrementer in
- * irq_happened.
- *
- * NMI interrupts can not check this when they return, so the
- * decrementer hardware exception is raised, which will fire
- * when interrupts are next enabled.
- *
- * BookE does not support this yet, it must audit all NMI
- * interrupt handlers to ensure they call nmi_enter() so this
- * check would be correct.
- */
- if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) {
- set_dec(1);
- } else {
- hard_irq_disable();
- local_paca->irq_happened |= PACA_IRQ_DEC;
- }
- preempt_enable();
-}
-
#else /* 32-bit */
DEFINE_PER_CPU(u8, irq_work_pending);
@@ -559,95 +452,133 @@ DEFINE_PER_CPU(u8, irq_work_pending);
#define test_irq_work_pending() __this_cpu_read(irq_work_pending)
#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0)
+#endif /* 32 vs 64 bit */
+
void arch_irq_work_raise(void)
{
+ /*
+ * 64-bit code that uses irq soft-mask can just cause an immediate
+ * interrupt here that gets soft masked, if this is called under
+ * local_irq_disable(). It might be possible to prevent that happening
+ * by noticing interrupts are disabled and setting decrementer pending
+ * to be replayed when irqs are enabled. The problem there is that
+ * tracing can call irq_work_raise, including in code that does low
+ * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on)
+ * which could get tangled up if we're messing with the same state
+ * here.
+ */
preempt_disable();
set_irq_work_pending_flag();
set_dec(1);
preempt_enable();
}
-#endif /* 32 vs 64 bit */
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+ /* We may have raced with new irq work */
+ if (unlikely(test_irq_work_pending()))
+ set_dec(1);
+}
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+}
#endif /* CONFIG_IRQ_WORK */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
+{
+ u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+
+ WARN_ON_ONCE(!arch_irqs_disabled());
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ if (now >= *next_tb) {
+ local_paca->irq_happened |= PACA_IRQ_DEC;
+ } else {
+ now = *next_tb - now;
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
+ }
+}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
+
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
*/
-void timer_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
{
struct clock_event_device *evt = this_cpu_ptr(&decrementers);
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
struct pt_regs *old_regs;
u64 now;
- /* Some implementations of hotplug will get timer interrupts while
- * offline, just ignore these and we also need to set
- * decrementers_next_tb as MAX to make sure __check_irq_replay
- * don't replay timer interrupt when return, otherwise we'll trap
- * here infinitely :(
+ /*
+ * Some implementations of hotplug will get timer interrupts while
+ * offline, just ignore these.
*/
if (unlikely(!cpu_online(smp_processor_id()))) {
- *next_tb = ~(u64)0;
set_dec(decrementer_max);
return;
}
- /* Ensure a positive value is written to the decrementer, or else
- * some CPUs will continue to take decrementer exceptions. When the
- * PPC_WATCHDOG (decrementer based) is configured, keep this at most
- * 31 bits, which is about 4 seconds on most systems, which gives
- * the watchdog a chance of catching timer interrupt hard lockups.
- */
- if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
- set_dec(0x7fffffff);
- else
- set_dec(decrementer_max);
-
- /* Conditionally hard-enable interrupts now that the DEC has been
- * bumped to its maximum value
- */
- may_hard_irq_enable();
+ /* Conditionally hard-enable interrupts. */
+ if (should_hard_irq_enable()) {
+ /*
+ * Ensure a positive value is written to the decrementer, or
+ * else some CPUs will continue to take decrementer exceptions.
+ * When the PPC_WATCHDOG (decrementer based) is configured,
+ * keep this at most 31 bits, which is about 4 seconds on most
+ * systems, which gives the watchdog a chance of catching timer
+ * interrupt hard lockups.
+ */
+ if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+ set_dec(0x7fffffff);
+ else
+ set_dec(decrementer_max);
+ do_hard_irq_enable();
+ }
#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
- do_IRQ(regs);
+ __do_IRQ(regs);
#endif
old_regs = set_irq_regs(regs);
- irq_enter();
+
trace_timer_interrupt_entry(regs);
if (test_irq_work_pending()) {
clear_irq_work_pending();
+ mce_run_irq_context_handlers();
irq_work_run();
}
- now = get_tb_or_rtc();
+ now = get_tb();
if (now >= *next_tb) {
- *next_tb = ~(u64)0;
- if (evt->event_handler)
- evt->event_handler(evt);
+ evt->event_handler(evt);
__this_cpu_inc(irq_stat.timer_irqs_event);
} else {
now = *next_tb - now;
- if (now <= decrementer_max)
- set_dec(now);
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
trace_timer_interrupt_exit(regs);
- irq_exit();
+
set_irq_regs(old_regs);
}
EXPORT_SYMBOL(timer_interrupt);
@@ -655,26 +586,18 @@ EXPORT_SYMBOL(timer_interrupt);
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void timer_broadcast_interrupt(void)
{
- u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
-
- *next_tb = ~(u64)0;
tick_receive_broadcast();
__this_cpu_inc(irq_stat.broadcast_irqs_event);
}
#endif
-/*
- * Hypervisor decrementer interrupts shouldn't occur but are sometimes
- * left pending on exit from a KVM guest. We don't need to do anything
- * to clear them, as they are edge-triggered.
- */
-void hdec_interrupt(struct pt_regs *regs)
-{
-}
-
#ifdef CONFIG_SUSPEND
-static void generic_suspend_disable_irqs(void)
+/* Overrides the weak version in kernel/power/main.c */
+void arch_suspend_disable_irqs(void)
{
+ if (ppc_md.suspend_disable_irqs)
+ ppc_md.suspend_disable_irqs();
+
/* Disable the decrementer, so that it doesn't interfere
* with suspending.
*/
@@ -684,23 +607,11 @@ static void generic_suspend_disable_irqs(void)
set_dec(decrementer_max);
}
-static void generic_suspend_enable_irqs(void)
-{
- local_irq_enable();
-}
-
-/* Overrides the weak version in kernel/power/main.c */
-void arch_suspend_disable_irqs(void)
-{
- if (ppc_md.suspend_disable_irqs)
- ppc_md.suspend_disable_irqs();
- generic_suspend_disable_irqs();
-}
-
/* Overrides the weak version in kernel/power/main.c */
void arch_suspend_enable_irqs(void)
{
- generic_suspend_enable_irqs();
+ local_irq_enable();
+
if (ppc_md.suspend_enable_irqs)
ppc_md.suspend_enable_irqs();
}
@@ -721,8 +632,6 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
*/
notrace unsigned long long sched_clock(void)
{
- if (__USE_RTC())
- return get_rtc();
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
}
@@ -782,7 +691,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
static void start_cpu_decrementer(void)
{
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
unsigned int tcr;
/* Clear any pending timer interrupts */
@@ -838,7 +747,7 @@ static void __read_persistent_clock(struct timespec64 *ts)
static int first = 1;
ts->tv_nsec = 0;
- /* XXX this is a litle fragile but will work okay in the short term */
+ /* XXX this is a little fragile but will work okay in the short term */
if (first) {
first = 0;
if (ppc_md.time_init)
@@ -872,113 +781,14 @@ void read_persistent_clock64(struct timespec64 *ts)
}
/* clocksource code */
-static notrace u64 rtc_read(struct clocksource *cs)
-{
- return (u64)get_rtc();
-}
-
static notrace u64 timebase_read(struct clocksource *cs)
{
return (u64)get_tb();
}
-
-void update_vsyscall(struct timekeeper *tk)
-{
- struct timespec64 xt;
- struct clocksource *clock = tk->tkr_mono.clock;
- u32 mult = tk->tkr_mono.mult;
- u32 shift = tk->tkr_mono.shift;
- u64 cycle_last = tk->tkr_mono.cycle_last;
- u64 new_tb_to_xs, new_stamp_xsec;
- u64 frac_sec;
-
- if (clock != &clocksource_timebase)
- return;
-
- xt.tv_sec = tk->xtime_sec;
- xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_mb();
-
- /*
- * This computes ((2^20 / 1e9) * mult) >> shift as a
- * 0.64 fixed-point fraction.
- * The computation in the else clause below won't overflow
- * (as long as the timebase frequency is >= 1.049 MHz)
- * but loses precision because we lose the low bits of the constant
- * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9.
- * For a shift of 24 the error is about 0.5e-9, or about 0.5ns
- * over a second. (Shift values are usually 22, 23 or 24.)
- * For high frequency clocks such as the 512MHz timebase clock
- * on POWER[6789], the mult value is small (e.g. 32768000)
- * and so we can shift the constant by 16 initially
- * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the
- * remaining shifts after the multiplication, which gives a
- * more accurate result (e.g. with mult = 32768000, shift = 24,
- * the error is only about 1.2e-12, or 0.7ns over 10 minutes).
- */
- if (mult <= 62500000 && clock->shift >= 16)
- new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16);
- else
- new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
-
- /*
- * Compute the fractional second in units of 2^-32 seconds.
- * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift
- * in nanoseconds, so multiplying that by 2^32 / 1e9 gives
- * it in units of 2^-32 seconds.
- * We assume shift <= 32 because clocks_calc_mult_shift()
- * generates shift values in the range 0 - 32.
- */
- frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift);
- do_div(frac_sec, NSEC_PER_SEC);
-
- /*
- * Work out new stamp_xsec value for any legacy users of systemcfg.
- * stamp_xsec is in units of 2^-20 seconds.
- */
- new_stamp_xsec = frac_sec >> 12;
- new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC;
-
- /*
- * tb_update_count is used to allow the userspace gettimeofday code
- * to assure itself that it sees a consistent view of the tb_to_xs and
- * stamp_xsec variables. It reads the tb_update_count, then reads
- * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
- * the two values of tb_update_count match and are even then the
- * tb_to_xs and stamp_xsec values are consistent. If not, then it
- * loops back and reads them again until this criteria is met.
- */
- vdso_data->tb_orig_stamp = cycle_last;
- vdso_data->stamp_xsec = new_stamp_xsec;
- vdso_data->tb_to_xs = new_tb_to_xs;
- vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
- vdso_data->stamp_xtime_sec = xt.tv_sec;
- vdso_data->stamp_xtime_nsec = xt.tv_nsec;
- vdso_data->stamp_sec_fraction = frac_sec;
- vdso_data->hrtimer_res = hrtimer_resolution;
- smp_wmb();
- ++(vdso_data->tb_update_count);
-}
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
-
static void __init clocksource_init(void)
{
- struct clocksource *clock;
-
- if (__USE_RTC())
- clock = &clocksource_rtc;
- else
- clock = &clocksource_timebase;
+ struct clocksource *clock = &clocksource_timebase;
if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
printk(KERN_ERR "clocksource: %s is already registered\n",
@@ -993,19 +803,17 @@ static void __init clocksource_init(void)
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
- __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt);
- set_dec(evt);
-
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ __this_cpu_write(decrementers_next_tb, get_tb() + evt);
+ set_dec_or_work(evt);
return 0;
}
static int decrementer_shutdown(struct clock_event_device *dev)
{
- decrementer_set_next_event(decrementer_max, dev);
+ __this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED);
+ set_dec_or_work(decrementer_max);
+
return 0;
}
@@ -1084,7 +892,7 @@ void secondary_cpu_time_init(void)
*/
start_cpu_decrementer();
- /* FIME: Should make unrelatred change to move snapshot_timebase
+ /* FIME: Should make unrelated change to move snapshot_timebase
* call here ! */
register_decrementer_clockevent(smp_processor_id());
}
@@ -1096,17 +904,12 @@ void __init time_init(void)
u64 scale;
unsigned shift;
- if (__USE_RTC()) {
- /* 601 processor: dec counts down by 128 every 128ns */
- ppc_tb_freq = 1000000000;
- } else {
- /* Normal PowerPC with timebase register */
- ppc_md.calibrate_decr();
- printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
- ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
- printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
- ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
- }
+ /* Normal PowerPC with timebase register */
+ ppc_md.calibrate_decr();
+ printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
+ ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
+ printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
+ ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
tb_ticks_per_jiffy = ppc_tb_freq / HZ;
tb_ticks_per_sec = ppc_tb_freq;
@@ -1132,7 +935,7 @@ void __init time_init(void)
tb_to_ns_scale = scale;
tb_to_ns_shift = shift;
/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
- boot_tb = get_tb_or_rtc();
+ boot_tb = get_tb();
/* If platform provided a timezone (pmac), we correct the time */
if (timezone_offset) {
@@ -1140,7 +943,6 @@ void __init time_init(void)
sys_tz.tz_dsttime = 0;
}
- vdso_data->tb_update_count = 0;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
/* initialise and enable the large decrementer (if we have one) */
@@ -1158,9 +960,8 @@ void __init time_init(void)
init_decrementer_clockevent();
tick_setup_hrtimer_broadcast();
-#ifdef CONFIG_COMMON_CLK
of_clk_init(NULL);
-#endif
+ enable_sched_clock_irqtime();
}
/*