From 4a32fea9d78f2d2315c0072757b197d5a304dc8b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:27 -0500 Subject: scheduler: Replace __get_cpu_var with this_cpu_ptr Convert all uses of __get_cpu_var for address calculation to use this_cpu_ptr instead. [Uses of __get_cpu_var with cpumask_var_t are no longer handled by this patch] Cc: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/kernel_stat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/kernel_stat.h') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index ecbc52f9ff77..8422b4ed6882 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -44,8 +44,8 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); /* Must have preemption disabled for this to be meaningful. */ -#define kstat_this_cpu (&__get_cpu_var(kstat)) -#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat)) +#define kstat_this_cpu this_cpu_ptr(&kstat) +#define kcpustat_this_cpu this_cpu_ptr(&kernel_cpustat) #define kstat_cpu(cpu) per_cpu(kstat, cpu) #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu) -- cgit v1.2.3-59-g8ed1b From 23cfa361f3e54a3e184a5e126bbbdd95f984881a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Nov 2014 12:37:37 +0100 Subject: sched/cputime: Fix cpu_timer_sample_group() double accounting While looking over the cpu-timer code I found that we appear to add the delta for the calling task twice, through: cpu_timer_sample_group() thread_group_cputimer() thread_group_cputime() times->sum_exec_runtime += task_sched_runtime(); *sample = cputime.sum_exec_runtime + task_delta_exec(); Which would make the sample run ahead, making the sleep short. Signed-off-by: Peter Zijlstra (Intel) Cc: KOSAKI Motohiro Cc: Oleg Nesterov Cc: Stanislaw Gruszka Cc: Christoph Lameter Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Rik van Riel Cc: Tejun Heo Link: http://lkml.kernel.org/r/20141112113737.GI10476@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 5 ----- kernel/sched/core.c | 13 ------------- kernel/time/posix-cpu-timers.c | 2 +- 3 files changed, 1 insertion(+), 19 deletions(-) (limited to 'include/linux/kernel_stat.h') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 8422b4ed6882..b9376cd5a187 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -77,11 +77,6 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) return kstat_cpu(cpu).irqs_sum; } -/* - * Lock/unlock the current runqueue - to extract task statistics: - */ -extern unsigned long long task_delta_exec(struct task_struct *); - extern void account_user_time(struct task_struct *, cputime_t, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); extern void account_steal_time(cputime_t); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5f12ca65c9a7..797a6c84c48d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2499,19 +2499,6 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) return ns; } -unsigned long long task_delta_exec(struct task_struct *p) -{ - unsigned long flags; - struct rq *rq; - u64 ns = 0; - - rq = task_rq_lock(p, &flags); - ns = do_task_delta_exec(p, rq); - task_rq_unlock(rq, p, &flags); - - return ns; -} - /* * Return accounted runtime for the task. * In case the task is currently running, return the runtime plus current's diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 492b986195d5..a16b67859e2a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -553,7 +553,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock, *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: - *sample = cputime.sum_exec_runtime + task_delta_exec(p); + *sample = cputime.sum_exec_runtime; break; } return 0; -- cgit v1.2.3-59-g8ed1b From c291ee622165cb2c8d4e7af63fffd499354a23be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Dec 2014 23:01:41 +0100 Subject: genirq: Prevent proc race against freeing of irq descriptors Since the rework of the sparse interrupt code to actually free the unused interrupt descriptors there exists a race between the /proc interfaces to the irq subsystem and the code which frees the interrupt descriptor. CPU0 CPU1 show_interrupts() desc = irq_to_desc(X); free_desc(desc) remove_from_radix_tree(); kfree(desc); raw_spinlock_irq(&desc->lock); /proc/interrupts is the only interface which can actively corrupt kernel memory via the lock access. /proc/stat can only read from freed memory. Extremly hard to trigger, but possible. The interfaces in /proc/irq/N/ are not affected by this because the removal of the proc file is serialized in procfs against concurrent readers/writers. The removal happens before the descriptor is freed. For architectures which have CONFIG_SPARSE_IRQ=n this is a non issue as the descriptor is never freed. It's merely cleared out with the irq descriptor lock held. So any concurrent proc access will either see the old correct value or the cleared out ones. Protect the lookup and access to the irq descriptor in show_interrupts() with the sparse_irq_lock. Provide kstat_irqs_usr() which is protecting the lookup and access with sparse_irq_lock and switch /proc/stat to use it. Document the existing kstat_irqs interfaces so it's clear that the caller needs to take care about protection. The users of these interfaces are either not affected due to SPARSE_IRQ=n or already protected against removal. Fixes: 1f5a5b87f78f "genirq: Implement a sane sparse_irq allocator" Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- fs/proc/stat.c | 2 +- include/linux/kernel_stat.h | 1 + kernel/irq/internals.h | 4 ++++ kernel/irq/irqdesc.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ kernel/irq/proc.c | 22 ++++++++++++++++++- 5 files changed, 79 insertions(+), 2 deletions(-) (limited to 'include/linux/kernel_stat.h') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index bf2d03f8fd3e..510413eb25b8 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -159,7 +159,7 @@ static int show_stat(struct seq_file *p, void *v) /* sum again ? it could be updated? */ for_each_irq_nr(j) - seq_put_decimal_ull(p, ' ', kstat_irqs(j)); + seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j)); seq_printf(p, "\nctxt %llu\n" diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index b9376cd5a187..25a822f6f000 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -68,6 +68,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) * Number of interrupts per specific IRQ source, since bootup */ extern unsigned int kstat_irqs(unsigned int irq); +extern unsigned int kstat_irqs_usr(unsigned int irq); /* * Number of interrupts per cpu, since bootup diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4332d766619d..df553b0af936 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -78,8 +78,12 @@ extern void unmask_threaded_irq(struct irq_desc *desc); #ifdef CONFIG_SPARSE_IRQ static inline void irq_mark_irq(unsigned int irq) { } +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); #else extern void irq_mark_irq(unsigned int irq); +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } #endif extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a1782f88f0af..99793b9b6d23 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -132,6 +132,16 @@ static void free_masks(struct irq_desc *desc) static inline void free_masks(struct irq_desc *desc) { } #endif +void irq_lock_sparse(void) +{ + mutex_lock(&sparse_irq_lock); +} + +void irq_unlock_sparse(void) +{ + mutex_unlock(&sparse_irq_lock); +} + static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) { struct irq_desc *desc; @@ -168,6 +178,12 @@ static void free_desc(unsigned int irq) unregister_irq_proc(irq, desc); + /* + * sparse_irq_lock protects also show_interrupts() and + * kstat_irq_usr(). Once we deleted the descriptor from the + * sparse tree we can free it. Access in proc will fail to + * lookup the descriptor. + */ mutex_lock(&sparse_irq_lock); delete_irq_desc(irq); mutex_unlock(&sparse_irq_lock); @@ -574,6 +590,15 @@ void kstat_incr_irq_this_cpu(unsigned int irq) kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); } +/** + * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu + * @irq: The interrupt number + * @cpu: The cpu number + * + * Returns the sum of interrupt counts on @cpu since boot for + * @irq. The caller must ensure that the interrupt is not removed + * concurrently. + */ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); @@ -582,6 +607,14 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; } +/** + * kstat_irqs - Get the statistics for an interrupt + * @irq: The interrupt number + * + * Returns the sum of interrupt counts on all cpus since boot for + * @irq. The caller must ensure that the interrupt is not removed + * concurrently. + */ unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -594,3 +627,22 @@ unsigned int kstat_irqs(unsigned int irq) sum += *per_cpu_ptr(desc->kstat_irqs, cpu); return sum; } + +/** + * kstat_irqs_usr - Get the statistics for an interrupt + * @irq: The interrupt number + * + * Returns the sum of interrupt counts on all cpus since boot for + * @irq. Contrary to kstat_irqs() this can be called from any + * preemptible context. It's protected against concurrent removal of + * an interrupt descriptor when sparse irqs are enabled. + */ +unsigned int kstat_irqs_usr(unsigned int irq) +{ + int sum; + + irq_lock_sparse(); + sum = kstat_irqs(irq); + irq_unlock_sparse(); + return sum; +} diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index ac1ba2f11032..9dc9bfd8a678 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -15,6 +15,23 @@ #include "internals.h" +/* + * Access rules: + * + * procfs protects read/write of /proc/irq/N/ files against a + * concurrent free of the interrupt descriptor. remove_proc_entry() + * immediately prevents new read/writes to happen and waits for + * already running read/write functions to complete. + * + * We remove the proc entries first and then delete the interrupt + * descriptor from the radix tree and free it. So it is guaranteed + * that irq_to_desc(N) is valid as long as the read/writes are + * permitted by procfs. + * + * The read from /proc/interrupts is a different problem because there + * is no protection. So the lookup and the access to irqdesc + * information must be protected by sparse_irq_lock. + */ static struct proc_dir_entry *root_irq_dir; #ifdef CONFIG_SMP @@ -437,9 +454,10 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } + irq_lock_sparse(); desc = irq_to_desc(i); if (!desc) - return 0; + goto outsparse; raw_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) @@ -479,6 +497,8 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: raw_spin_unlock_irqrestore(&desc->lock, flags); +outsparse: + irq_unlock_sparse(); return 0; } #endif -- cgit v1.2.3-59-g8ed1b