From 6e766410c4babd37bc7cd5e25009c179781742c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: annotate core code that should not be traced Mark with "notrace" functions in core code that should not be traced. The "notrace" attribute will prevent gcc from adding a call to ftrace on the annotated funtions. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- lib/smp_processor_id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 6c90fb90e19c..e555ab62fbad 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -7,7 +7,7 @@ #include #include -unsigned int debug_smp_processor_id(void) +notrace unsigned int debug_smp_processor_id(void) { unsigned long preempt_count = preempt_count(); int this_cpu = raw_smp_processor_id(); -- cgit v1.2.3-59-g8ed1b From 16444a8a40d4c7b4f6de34af0cae1f76a4f6c901 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: add basic support for gcc profiler instrumentation If CONFIG_FTRACE is selected and /proc/sys/kernel/ftrace_enabled is set to a non-zero value the ftrace routine will be called everytime we enter a kernel function that is not marked with the "notrace" attribute. The ftrace routine will then call a registered function if a function happens to be registered. [ This code has been highly hacked by Steven Rostedt and Ingo Molnar, so don't blame Arnaldo for all of this ;-) ] Update: It is now possible to register more than one ftrace function. If only one ftrace function is registered, that will be the function that ftrace calls directly. If more than one function is registered, then ftrace will call a function that will loop through the functions to call. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Makefile | 4 ++ arch/x86/Kconfig | 1 + arch/x86/kernel/entry_32.S | 27 +++++++++ arch/x86/kernel/entry_64.S | 37 ++++++++++++ include/linux/ftrace.h | 38 +++++++++++++ kernel/Makefile | 1 + kernel/trace/Kconfig | 5 ++ kernel/trace/Makefile | 3 + kernel/trace/ftrace.c | 138 +++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 2 + 10 files changed, 256 insertions(+) create mode 100644 include/linux/ftrace.h create mode 100644 kernel/trace/Kconfig create mode 100644 kernel/trace/Makefile create mode 100644 kernel/trace/ftrace.c (limited to 'lib') diff --git a/Makefile b/Makefile index 20b32351906b..b4a273f19b52 100644 --- a/Makefile +++ b/Makefile @@ -528,6 +528,10 @@ KBUILD_CFLAGS += -g KBUILD_AFLAGS += -gdwarf-2 endif +ifdef CONFIG_FTRACE +KBUILD_CFLAGS += -pg +endif + # We trigger additional mismatches with less inlining ifdef CONFIG_DEBUG_SECTION_MISMATCH KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae7ef2f..c742dfeb0dbe 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,6 +23,7 @@ config X86 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc3271c..f47b9b5440d2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,6 +1109,33 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ +#ifdef CONFIG_FTRACE +ENTRY(mcount) + cmpl $ftrace_stub, ftrace_trace_function + jnz trace + +.globl ftrace_stub +ftrace_stub: + ret + + /* taken from glibc */ +trace: + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + movl 0x4(%ebp), %edx + + call *ftrace_trace_function + + popl %edx + popl %ecx + popl %eax + + jmp ftrace_stub +END(mcount) +#endif + .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 556a8df522a7..f046e0c64883 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -54,6 +54,43 @@ .code64 +#ifdef CONFIG_FTRACE +ENTRY(mcount) + cmpq $ftrace_stub, ftrace_trace_function + jnz trace +.globl ftrace_stub +ftrace_stub: + retq + +trace: + /* taken from glibc */ + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + movq 8(%rbp), %rsi + + call *ftrace_trace_function + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + + jmp ftrace_stub +END(mcount) +#endif + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h new file mode 100644 index 000000000000..b96ef14c249a --- /dev/null +++ b/include/linux/ftrace.h @@ -0,0 +1,38 @@ +#ifndef _LINUX_FTRACE_H +#define _LINUX_FTRACE_H + +#ifdef CONFIG_FTRACE + +#include + +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); + +struct ftrace_ops { + ftrace_func_t func; + struct ftrace_ops *next; +}; + +/* + * The ftrace_ops must be a static and should also + * be read_mostly. These functions do modify read_mostly variables + * so use them sparely. Never free an ftrace_op or modify the + * next pointer after it has been registered. Even after unregistering + * it, the next pointer may still be used internally. + */ +int register_ftrace_function(struct ftrace_ops *ops); +int unregister_ftrace_function(struct ftrace_ops *ops); +void clear_ftrace_function(void); + +extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void mcount(void); + +#else /* !CONFIG_FTRACE */ +# define register_ftrace_function(ops) do { } while (0) +# define unregister_ftrace_function(ops) do { } while (0) +# define clear_ftrace_function(ops) do { } while (0) +#endif /* CONFIG_FTRACE */ +#endif /* _LINUX_FTRACE_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 1c9938addb9d..fa05f6d8bdbf 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_LATENCYTOP) += latencytop.o +obj-$(CONFIG_FTRACE) += trace/ ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig new file mode 100644 index 000000000000..8185c91417bc --- /dev/null +++ b/kernel/trace/Kconfig @@ -0,0 +1,5 @@ +# +# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: +# +config HAVE_FTRACE + bool diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile new file mode 100644 index 000000000000..bf4fd215a6a9 --- /dev/null +++ b/kernel/trace/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_FTRACE) += libftrace.o + +libftrace-y := ftrace.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c new file mode 100644 index 000000000000..b6a80b98a3fb --- /dev/null +++ b/kernel/trace/ftrace.c @@ -0,0 +1,138 @@ +/* + * Infrastructure for profiling code inserted by 'gcc -pg'. + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2004-2008 Ingo Molnar + * + * Originally ported from the -rt patch by: + * Copyright (C) 2007 Arnaldo Carvalho de Melo + * + * Based on code in the latency_tracer, that is: + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ + +#include +#include + +static DEFINE_SPINLOCK(ftrace_func_lock); +static struct ftrace_ops ftrace_list_end __read_mostly = +{ + .func = ftrace_stub, +}; + +static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; +ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; + +/* mcount is defined per arch in assembly */ +EXPORT_SYMBOL(mcount); + +notrace void ftrace_list_func(unsigned long ip, unsigned long parent_ip) +{ + struct ftrace_ops *op = ftrace_list; + + /* in case someone actually ports this to alpha! */ + read_barrier_depends(); + + while (op != &ftrace_list_end) { + /* silly alpha */ + read_barrier_depends(); + op->func(ip, parent_ip); + op = op->next; + }; +} + +/** + * register_ftrace_function - register a function for profiling + * @ops - ops structure that holds the function for profiling. + * + * Register a function to be called by all functions in the + * kernel. + * + * Note: @ops->func and all the functions it calls must be labeled + * with "notrace", otherwise it will go into a + * recursive loop. + */ +int register_ftrace_function(struct ftrace_ops *ops) +{ + unsigned long flags; + + spin_lock_irqsave(&ftrace_func_lock, flags); + ops->next = ftrace_list; + /* + * We are entering ops into the ftrace_list but another + * CPU might be walking that list. We need to make sure + * the ops->next pointer is valid before another CPU sees + * the ops pointer included into the ftrace_list. + */ + smp_wmb(); + ftrace_list = ops; + /* + * For one func, simply call it directly. + * For more than one func, call the chain. + */ + if (ops->next == &ftrace_list_end) + ftrace_trace_function = ops->func; + else + ftrace_trace_function = ftrace_list_func; + spin_unlock_irqrestore(&ftrace_func_lock, flags); + + return 0; +} + +/** + * unregister_ftrace_function - unresgister a function for profiling. + * @ops - ops structure that holds the function to unregister + * + * Unregister a function that was added to be called by ftrace profiling. + */ +int unregister_ftrace_function(struct ftrace_ops *ops) +{ + unsigned long flags; + struct ftrace_ops **p; + int ret = 0; + + spin_lock_irqsave(&ftrace_func_lock, flags); + + /* + * If we are the only function, then the ftrace pointer is + * pointing directly to that function. + */ + if (ftrace_list == ops && ops->next == &ftrace_list_end) { + ftrace_trace_function = ftrace_stub; + ftrace_list = &ftrace_list_end; + goto out; + } + + for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) + if (*p == ops) + break; + + if (*p != ops) { + ret = -1; + goto out; + } + + *p = (*p)->next; + + /* If we only have one func left, then call that directly */ + if (ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; + + out: + spin_unlock_irqrestore(&ftrace_func_lock, flags); + + return 0; +} + +/** + * clear_ftrace_function - reset the ftrace function + * + * This NULLs the ftrace function and in essence stops + * tracing. There may be lag + */ +void clear_ftrace_function(void) +{ + ftrace_trace_function = ftrace_stub; +} diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d2099f41aa1e..d8b6279a9b42 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -634,6 +634,8 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. +source kernel/trace/Kconfig + config PROVIDE_OHCI1394_DMA_INIT bool "Remote debugging over FireWire early on boot" depends on PCI && X86 -- cgit v1.2.3-59-g8ed1b From 5568b139f4d196273958ae2947a736fdf1ffeece Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: debug smp_processor_id, use notrace preempt disable The debug smp_processor_id caused a recursive fault in debugging the irqsoff tracer. The tracer used a smp_processor_id in the ftrace callback, and this function called preempt_disable which also is traced. This caused a recursive fault (stack overload). Since using smp_processor_id without debugging on does not cause faults with the tracer (even when the tracer is wrong), the debug version should not cause a system reboot. This changes the debug_smp_processor_id to use the notrace versions of preempt_disable and enable. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- lib/smp_processor_id.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index e555ab62fbad..3b4dc098181e 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -37,7 +37,7 @@ notrace unsigned int debug_smp_processor_id(void) /* * Avoid recursion: */ - preempt_disable(); + preempt_disable_notrace(); if (!printk_ratelimit()) goto out_enable; @@ -49,7 +49,7 @@ notrace unsigned int debug_smp_processor_id(void) dump_stack(); out_enable: - preempt_enable_no_resched(); + preempt_enable_no_resched_notrace(); out: return this_cpu; } -- cgit v1.2.3-59-g8ed1b From 3594136ad67a54d77bcb2547e70011754a2f91d5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:50 +0200 Subject: ftrace: do not profile lib/string.o Most archs define the string and memory compare functions in assembly. Some do not. But these functions may be used in some archs at early boot up. Since most archs define this code in assembly and they are not usually traced, there's no need to trace them when they are not defined in assembly. This patch removes the -pg from the CFLAGS for lib/string.o. This prevents the string functions use in either vdso or early bootup from crashing the system. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- lib/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 74b0cfb1fcc3..6ca9e6ee1e33 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -8,6 +8,14 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o +ifdef CONFIG_FTRACE +# Do not profile string.o, since it may be used in early boot or vdso +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(if $(subst string,,$(basename $(notdir $@))), \ + $(ORIG_CFLAGS), \ + $(subst -pg,,$(ORIG_CFLAGS))) +endif + lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -- cgit v1.2.3-59-g8ed1b From 9d0a420b737f72d84fabebf29634d800cbf54538 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:55 +0200 Subject: ftrace: remove function tracing from spinlock debug The debug functions in spin_lock debugging pollute the output of the function tracer. This patch adds the debug files in the lib director to those that should not be compiled with mcount tracing. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- lib/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 6ca9e6ee1e33..d97ad1100b6f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -10,8 +10,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ ifdef CONFIG_FTRACE # Do not profile string.o, since it may be used in early boot or vdso +# Also do not profile any debug utilities ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(if $(subst string,,$(basename $(notdir $@))), \ +KBUILD_CFLAGS = $(if $(filter-out %debug debug% string%,$(basename $(notdir $@))), \ $(ORIG_CFLAGS), \ $(subst -pg,,$(ORIG_CFLAGS))) endif -- cgit v1.2.3-59-g8ed1b From 654e4787689faffdd2137fe91f59fd3ef3363ad2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 21:30:31 -0400 Subject: ftrace: use the new kbuild CFLAGS_REMOVE for lib directory This patch removes the Makefile turd and uses the nice CFLAGS_REMOVE macro in the lib directory. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- lib/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index d97ad1100b6f..4b836a53c08f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -10,11 +10,11 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ ifdef CONFIG_FTRACE # Do not profile string.o, since it may be used in early boot or vdso +CFLAGS_REMOVE_string.o = -pg # Also do not profile any debug utilities -ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(if $(filter-out %debug debug% string%,$(basename $(notdir $@))), \ - $(ORIG_CFLAGS), \ - $(subst -pg,,$(ORIG_CFLAGS))) +CFLAGS_REMOVE_spinlock_debug.o = -pg +CFLAGS_REMOVE_list_debug.o = -pg +CFLAGS_REMOVE_debugobjects.o = -pg endif lib-$(CONFIG_MMU) += ioremap.o -- cgit v1.2.3-59-g8ed1b From 886dd58258e6ddebe20e7aebef7b167a24bad7ee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 15:44:38 +0200 Subject: debugging: make stacktrace independent from DEBUG_KERNEL Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d2099f41aa1e..9c17fb9d1d5e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -419,7 +419,6 @@ config DEBUG_LOCKING_API_SELFTESTS config STACKTRACE bool - depends on DEBUG_KERNEL depends on STACKTRACE_SUPPORT config DEBUG_KOBJECT -- cgit v1.2.3-59-g8ed1b From 50db04dd9c74178e68a981a7127c37252ffb3242 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 15 Jun 2008 00:47:36 +0200 Subject: debugobjects: fix lockdep warning Daniel J Blueman reported: | ======================================================= | [ INFO: possible circular locking dependency detected ] | 2.6.26-rc5-201c #1 | ------------------------------------------------------- | nscd/3669 is trying to acquire lock: | (&n->list_lock){.+..}, at: [] deactivate_slab+0x173/0x1e0 | | but task is already holding lock: | (&obj_hash[i].lock){++..}, at: [] | __debug_object_init+0x2f/0x350 | | which lock already depends on the new lock. There are two locks involved here; the first is a SLUB-local lock, and the second is a debugobjects-local lock. They are basically taken in two different orders: 1. SLUB { debugobjects { ... } } 2. debugobjects { SLUB { ... } } This patch changes pattern #2 by trying to fill the memory pool (e.g. the call into SLUB/kmalloc()) outside the debugobjects lock, so now the two patterns look like this: 1. SLUB { debugobjects { ... } } 2. SLUB { } debugobjects { ... } [ daniel.blueman@gmail.com: pool_lock needs to be taken irq safe in fill_pool ] Reported-by: Daniel J Blueman Signed-off-by: Vegard Nossum Signed-off-by: Thomas Gleixner --- lib/debugobjects.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a76a5e122ae1..85b18d79be89 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -68,6 +68,7 @@ static int fill_pool(void) { gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; struct debug_obj *new; + unsigned long flags; if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL)) return obj_pool_free; @@ -81,10 +82,10 @@ static int fill_pool(void) if (!new) return obj_pool_free; - spin_lock(&pool_lock); + spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); obj_pool_free++; - spin_unlock(&pool_lock); + spin_unlock_irqrestore(&pool_lock, flags); } return obj_pool_free; } @@ -110,16 +111,13 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b) } /* - * Allocate a new object. If the pool is empty and no refill possible, - * switch off the debugger. + * Allocate a new object. If the pool is empty, switch off the debugger. */ static struct debug_obj * alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) { struct debug_obj *obj = NULL; - int retry = 0; -repeat: spin_lock(&pool_lock); if (obj_pool.first) { obj = hlist_entry(obj_pool.first, typeof(*obj), node); @@ -141,9 +139,6 @@ repeat: } spin_unlock(&pool_lock); - if (fill_pool() && !obj && !retry++) - goto repeat; - return obj; } @@ -261,6 +256,8 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) struct debug_obj *obj; unsigned long flags; + fill_pool(); + db = get_bucket((unsigned long) addr); spin_lock_irqsave(&db->lock, flags); -- cgit v1.2.3-59-g8ed1b From ad118c54a3587b2c69a769d0ba37d4d8dce4559d Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 27 Jun 2008 18:04:48 +0200 Subject: stacktrace: add saved stack traces to backtrace self-test This patch adds saved stack-traces to the backtrace suite of self-tests. Note that we don't depend on or unconditionally enable CONFIG_STACKTRACE because not all architectures may have it (and we still want to enable the other tests for those architectures). Cc: Arjan van de Ven Signed-off-by: Vegard Nossum Cc: Arjan van de Ven Cc: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/backtracetest.c | 30 +++++++++++++++++++++++++++++- lib/Kconfig.debug | 3 +++ 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c index d1a7605c5b8f..50f7abd0813d 100644 --- a/kernel/backtracetest.c +++ b/kernel/backtracetest.c @@ -10,9 +10,10 @@ * of the License. */ +#include #include #include -#include +#include static struct timer_list backtrace_timer; @@ -22,6 +23,31 @@ static void backtrace_test_timer(unsigned long data) printk("The following trace is a kernel self test and not a bug!\n"); dump_stack(); } + +#ifdef CONFIG_STACKTRACE +static void backtrace_test_saved(void) +{ + struct stack_trace trace; + unsigned long entries[8]; + + printk("Testing a saved backtrace.\n"); + printk("The following trace is a kernel self test and not a bug!\n"); + + trace.nr_entries = 0; + trace.max_entries = ARRAY_SIZE(entries); + trace.entries = entries; + trace.skip = 0; + + save_stack_trace(&trace); + print_stack_trace(&trace, 0); +} +#else +static void backtrace_test_saved(void) +{ + printk("Saved backtrace test skipped.\n"); +} +#endif + static int backtrace_regression_test(void) { printk("====[ backtrace testing ]===========\n"); @@ -29,6 +55,8 @@ static int backtrace_regression_test(void) printk("The following trace is a kernel self test and not a bug!\n"); dump_stack(); + backtrace_test_saved(); + init_timer(&backtrace_timer); backtrace_timer.function = backtrace_test_timer; mod_timer(&backtrace_timer, jiffies + 10); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9c17fb9d1d5e..6263e2d851f1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -562,6 +562,9 @@ config BACKTRACE_SELF_TEST for distributions or general kernels, but only for kernel developers working on architecture code. + Note that if you want to also test saved backtraces, you will + have to enable STACKTRACE as well. + Say N if you are unsure. config LKDTM -- cgit v1.2.3-59-g8ed1b From aebb6a849cfe7d89bcacaaecc20a480dfc1180e7 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Mon, 30 Jun 2008 12:42:23 -0700 Subject: textsearch: fix Boyer-Moore text search bug The current logic has a bug which cannot find matching pattern, if the pattern is matched from the first character of target string. for example: pattern=abc, string=abcdefg pattern=a, string=abcdefg Searching algorithm should return 0 for those things. Signed-off-by: Joonwoo Park Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- lib/ts_bm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/ts_bm.c b/lib/ts_bm.c index d90822c378a4..4a7fce72898e 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -63,7 +63,7 @@ static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) struct ts_bm *bm = ts_config_priv(conf); unsigned int i, text_len, consumed = state->offset; const u8 *text; - int shift = bm->patlen, bs; + int shift = bm->patlen - 1, bs; for (;;) { text_len = conf->get_next_block(consumed, &text, conf, state); -- cgit v1.2.3-59-g8ed1b From cde53535991fbb5c34a1566f25955297c1487b8d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 4 Jul 2008 09:59:22 -0700 Subject: Christoph has moved Remove all clameter@sgi.com addresses from the kernel tree since they will become invalid on June 27th. Change my maintainer email address for the slab allocators to cl@linux-foundation.org (which will be the new email address for the future). Signed-off-by: Christoph Lameter Signed-off-by: Christoph Lameter Cc: Pekka Enberg Cc: Stephen Rothwell Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/slabinfo.c | 4 ++-- Documentation/vm/slub.txt | 2 +- MAINTAINERS | 2 +- include/asm-generic/atomic.h | 2 +- include/linux/slab.h | 2 +- include/linux/slub_def.h | 2 +- kernel/workqueue.c | 2 +- lib/radix-tree.c | 2 +- mm/allocpercpu.c | 2 +- mm/migrate.c | 2 +- mm/slub.c | 2 +- mm/sparse-vmemmap.c | 2 +- 12 files changed, 13 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index e4230ed16ee7..df3227605d59 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c @@ -1,7 +1,7 @@ /* * Slabinfo: Tool to get reports about slabs * - * (C) 2007 sgi, Christoph Lameter + * (C) 2007 sgi, Christoph Lameter * * Compile by: * @@ -99,7 +99,7 @@ void fatal(const char *x, ...) void usage(void) { - printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" + printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n" "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" "-a|--aliases Show aliases\n" "-A|--activity Most active slabs first\n" diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index 7c13f22a0c9e..bb1f5c6e28b3 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt @@ -266,4 +266,4 @@ of other objects. slub_debug=FZ,dentry -Christoph Lameter, , May 30, 2007 +Christoph Lameter, May 30, 2007 diff --git a/MAINTAINERS b/MAINTAINERS index 460e699fd280..13b7b19692e6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3672,7 +3672,7 @@ S: Maintained SLAB ALLOCATOR P: Christoph Lameter -M: clameter@sgi.com +M: cl@linux-foundation.org P: Pekka Enberg M: penberg@cs.helsinki.fi P: Matt Mackall diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 85fd0aa27a8c..4ec0a296bdec 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -2,7 +2,7 @@ #define _ASM_GENERIC_ATOMIC_H /* * Copyright (C) 2005 Silicon Graphics, Inc. - * Christoph Lameter + * Christoph Lameter * * Allows to provide arch independent atomic definitions without the need to * edit all arch specific atomic.h files. diff --git a/include/linux/slab.h b/include/linux/slab.h index c2ad35016599..9aa90a6f20e0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -1,7 +1,7 @@ /* * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk). * - * (C) SGI 2006, Christoph Lameter + * (C) SGI 2006, Christoph Lameter * Cleaned up and restructured to ease the addition of alternative * implementations of SLAB allocators. */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index cef6f8fddd7d..d117ea2825a9 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -4,7 +4,7 @@ /* * SLUB : A Slab allocator without object queues. * - * (C) 2007 SGI, Christoph Lameter + * (C) 2007 SGI, Christoph Lameter */ #include #include diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 29fc39f1029c..ce7799540c91 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -13,7 +13,7 @@ * Kai Petzke * Theodore Ts'o * - * Made to use alloc_percpu by Christoph Lameter . + * Made to use alloc_percpu by Christoph Lameter. */ #include diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 169a2f8dabcc..56ec21a7f73d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig - * Copyright (C) 2005 SGI, Christoph Lameter + * Copyright (C) 2005 SGI, Christoph Lameter * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index f4026bae6eed..05f2b4009ccc 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -1,7 +1,7 @@ /* * linux/mm/allocpercpu.c * - * Separated from slab.c August 11, 2006 Christoph Lameter + * Separated from slab.c August 11, 2006 Christoph Lameter */ #include #include diff --git a/mm/migrate.c b/mm/migrate.c index 112bcaeaa104..55bd355d170d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -9,7 +9,7 @@ * IWAMOTO Toshihiro * Hirokazu Takahashi * Dave Hansen - * Christoph Lameter + * Christoph Lameter */ #include diff --git a/mm/slub.c b/mm/slub.c index 2c9a62d1f429..1a427c0ae83b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5,7 +5,7 @@ * The allocator synchronizes using per slab locks and only * uses a centralized lock to manage a pool of partial slabs. * - * (C) 2007 SGI, Christoph Lameter + * (C) 2007 SGI, Christoph Lameter */ #include diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 99c4f36eb8a3..a91b5f8fcaf6 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -1,7 +1,7 @@ /* * Virtual Memory Map support * - * (C) 2007 sgi. Christoph Lameter . + * (C) 2007 sgi. Christoph Lameter. * * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, * virt_to_page, page_address() to be implemented as a base offset -- cgit v1.2.3-59-g8ed1b From da9eac8990dc614ab4756f2a3d84870b675f1f1e Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 4 Jul 2008 09:59:36 -0700 Subject: lib: taint kernel in common report_bug() WARN path. Commit 95b570c9cef3b12356454c7112571b7e406b4b51 ("Taint kernel after WARN_ON(condition)") introduced a TAINT_WARN that was implemented for all architectures using the generic warn_on_slowpath(), which excluded any architecture that set HAVE_ARCH_WARN_ON. As all of the architectures that implement their own WARN_ON() all go through the report_bug() path (specifically handling BUG_TRAP_TYPE_WARN), taint the kernel there as well for consistency. Tested on avr32 and sh. Also relevant for s390, parisc, and powerpc. Signed-off-by: Haavard Skinnemoen Signed-off-by: Paul Mundt Acked-by: Kyle McMartin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bug.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index 530f38f55787..bfeafd60ee9f 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -37,6 +37,7 @@ */ #include #include +#include #include #include @@ -149,6 +150,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) (void *)bugaddr); show_regs(regs); + add_taint(TAINT_WARN); return BUG_TRAP_TYPE_WARN; } -- cgit v1.2.3-59-g8ed1b From 0f9bfa569d46f2346a53a940b2b9e49a38635732 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Jul 2008 16:06:25 -0700 Subject: vsprintf: split out '%s' handling logic The actual code is the same, just split out into a helper function. This makes it easier to read, and allows for future sharing of the string code. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 57 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 6021757a4496..926c7e00e2dc 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -482,6 +482,35 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int return buf; } +static char *string(char *buf, char *end, char *s, int field_width, int precision, int flags) +{ + int len, i; + + if ((unsigned long)s < PAGE_SIZE) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) { + while (len < field_width--) { + if (buf < end) + *buf = ' '; + ++buf; + } + } + for (i = 0; i < len; ++i) { + if (buf < end) + *buf = *s; + ++buf; ++s; + } + while (len < field_width--) { + if (buf < end) + *buf = ' '; + ++buf; + } + return buf; +} + /** * vsnprintf - Format a string and place it in a buffer * @buf: The buffer to place the result into @@ -502,11 +531,9 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int */ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) { - int len; unsigned long long num; - int i, base; + int base; char *str, *end, c; - const char *s; int flags; /* flags to number() */ @@ -622,29 +649,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) continue; case 's': - s = va_arg(args, char *); - if ((unsigned long)s < PAGE_SIZE) - s = ""; - - len = strnlen(s, precision); - - if (!(flags & LEFT)) { - while (len < field_width--) { - if (str < end) - *str = ' '; - ++str; - } - } - for (i = 0; i < len; ++i) { - if (str < end) - *str = *s; - ++str; ++s; - } - while (len < field_width--) { - if (str < end) - *str = ' '; - ++str; - } + str = string(str, end, va_arg(args, char *), field_width, precision, flags); continue; case 'p': -- cgit v1.2.3-59-g8ed1b From 78a8bf69b32980879975f7e31d30386c50bfe851 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Jul 2008 16:16:15 -0700 Subject: vsprintf: split out '%p' handling logic The actual code is the same, just split out into a helper function. This makes it easier to read, and allows for simple future extension of %p handling. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 926c7e00e2dc..f569feb7662e 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -511,6 +511,16 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio return buf; } +static char *pointer(char *buf, char *end, void *ptr, int field_width, int precision, int flags) +{ + flags |= SMALL; + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags); +} + /** * vsnprintf - Format a string and place it in a buffer * @buf: The buffer to place the result into @@ -653,17 +663,9 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) continue; case 'p': - flags |= SMALL; - if (field_width == -1) { - field_width = 2*sizeof(void *); - flags |= ZEROPAD; - } - str = number(str, end, - (unsigned long) va_arg(args, void *), - 16, field_width, precision, flags); + str = pointer(str, end, va_arg(args, void *), field_width, precision, flags); continue; - case 'n': /* FIXME: * What does C99 say about the overflow case here? */ -- cgit v1.2.3-59-g8ed1b From 4d8a743cdd2690c0bc8d1b8cbd02cffb1ead849f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Jul 2008 16:24:57 -0700 Subject: vsprintf: add infrastructure support for extended '%p' specifiers This expands the kernel '%p' handling with an arbitrary alphanumberic specifier extension string immediately following the '%p'. Right now it's just being ignored, but the next commit will start adding some specific pointer type extensions. NOTE! The reason the extension is appended to the '%p' is to allow minimal gcc type checking: gcc will still see the '%p' and will check that the argument passed in is indeed a pointer, and yet will not complain about the extended information that gcc doesn't understand about (on the other hand, it also won't actually check that the pointer type and the extension are compatible). Alphanumeric characters were chosen because there is no sane existing use for a string format with a hex pointer representation immediately followed by alphanumerics (which is what such a format string would have traditionally resulted in). Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index f569feb7662e..5d6f0718b6d9 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -511,7 +511,14 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio return buf; } -static char *pointer(char *buf, char *end, void *ptr, int field_width, int precision, int flags) +/* + * Show a '%p' thing. A kernel extension is that the '%p' is followed + * by an extra set of alphanumeric characters that are extended format + * specifiers. + * + * Right now don't actually handle any such, but we will.. + */ +static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags) { flags |= SMALL; if (field_width == -1) { @@ -663,7 +670,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) continue; case 'p': - str = pointer(str, end, va_arg(args, void *), field_width, precision, flags); + str = pointer(fmt+1, str, end, + va_arg(args, void *), + field_width, precision, flags); + /* Skip all alphanumeric pointer suffixes */ + while (isalnum(fmt[1])) + fmt++; continue; case 'n': -- cgit v1.2.3-59-g8ed1b From 0fe1ef24f7bd0020f29ffe287dfdb9ead33ca0b2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Jul 2008 16:43:12 -0700 Subject: vsprintf: add support for '%pS' and '%pF' pointer formats They print out a pointer in symbolic format, if possible (ie using symbolic KALLSYMS information). The '%pS' format is for regular direct pointers (which can point to data or code and that you find on the stack during backtraces etc), while '%pF' is for C function pointer types. On most architectures, the two mean exactly the same thing, but some architectures use an indirect pointer for C function pointers, where the function pointer points to a function descriptor (which in turn contains the actual pointer to the code). The '%pF' code automatically does the appropriate function descriptor dereference on such architectures. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 5d6f0718b6d9..1dc2d1d18fa8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include /* for PAGE_SIZE */ #include @@ -511,15 +513,52 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio return buf; } +static inline void *dereference_function_descriptor(void *ptr) +{ +#if defined(CONFIG_IA64) || defined(CONFIG_PPC64) + void *p; + if (!probe_kernel_address(ptr, p)) + ptr = p; +#endif + return ptr; +} + +static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags) +{ + unsigned long value = (unsigned long) ptr; +#ifdef CONFIG_KALLSYMS + char sym[KSYM_SYMBOL_LEN]; + sprint_symbol(sym, value); + return string(buf, end, sym, field_width, precision, flags); +#else + field_width = 2*sizeof(void *); + flags |= SPECIAL | SMALL | ZEROPAD; + return number(buf, end, value, 16, field_width, precision, flags); +#endif +} + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format * specifiers. * - * Right now don't actually handle any such, but we will.. + * Right now we just handle 'F' (for symbolic Function descriptor pointers) + * and 'S' (for Symbolic direct pointers), but this can easily be + * extended in the future (network address types etc). + * + * The difference between 'S' and 'F' is that on ia64 and ppc64 function + * pointers are really function descriptors, which contain a pointer the + * real address. */ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags) { + switch (*fmt) { + case 'F': + ptr = dereference_function_descriptor(ptr); + /* Fallthrough */ + case 'S': + return symbol_string(buf, end, ptr, field_width, precision, flags); + } flags |= SMALL; if (field_width == -1) { field_width = 2*sizeof(void *); -- cgit v1.2.3-59-g8ed1b From e8ced39d5e8911c662d4d69a342b9d053eaaac4e Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: percpu_counter: new function percpu_counter_sum_and_set Delayed allocation need to check free blocks at every write time. percpu_counter_read_positive() is not quit accurate. delayed allocation need a more accurate accounting, but using percpu_counter_sum_positive() is frequently is quite expensive. This patch added a new function to update center counter when sum per-cpu counter, to increase the accurate rate for next percpu_counter_read() and require less calling expensive percpu_counter_sum(). Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 2 +- include/linux/percpu_counter.h | 12 +++++++++--- lib/percpu_counter.c | 7 ++++++- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 25f63d8c1b3d..6369bacf0dcb 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1621,7 +1621,7 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, #ifdef CONFIG_SMP if (free_blocks - root_blocks < FBC_BATCH) free_blocks = - percpu_counter_sum_positive(&sbi->s_freeblocks_counter); + percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); #endif if (free_blocks - root_blocks < nblocks) return free_blocks - root_blocks; diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc112..208388835357 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc); + s64 ret = __percpu_counter_sum(fbc, 0); return ret < 0 ? 0 : ret; } +static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) +{ + return __percpu_counter_sum(fbc, 1); +} + + static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc); + return __percpu_counter_sum(fbc, 0); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 119174494cb5..4a8ba4bf5f6f 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add); * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive() */ -s64 __percpu_counter_sum(struct percpu_counter *fbc) +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) { s64 ret; int cpu; @@ -62,7 +62,12 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; + if (set) + *pcount = 0; } + if (set) + fbc->count = ret; + spin_unlock(&fbc->lock); return ret; } -- cgit v1.2.3-59-g8ed1b