From 4a7a16dc061e4c57bf288150f51bd4c2ace33723 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 12 Jun 2009 20:42:08 -0400 Subject: ACPI: move declaration acpi_early_init() to acpi.h Signed-off-by: Len Brown --- init/main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index d721dad05dd7..f1b9f0fdb1b4 100644 --- a/init/main.c +++ b/init/main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -86,11 +87,6 @@ extern void sbus_init(void); extern void prio_tree_init(void); extern void radix_tree_init(void); extern void free_initmem(void); -#ifdef CONFIG_ACPI -extern void acpi_early_init(void); -#else -static inline void acpi_early_init(void) { } -#endif #ifndef CONFIG_DEBUG_RODATA static inline void mark_rodata_ro(void) { } #endif -- cgit v1.2.3-59-g8ed1b From b99b87f70c7785ab1e253c6220f4b0b57ce3a7f7 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 17 Jun 2009 16:28:03 -0700 Subject: kernel: constructor support Call constructors (gcc-generated initcall-like functions) during kernel start and module load. Constructors are e.g. used for gcov data initialization. Disable constructor support for usermode Linux to prevent conflicts with host glibc. Signed-off-by: Peter Oberparleiter Acked-by: Rusty Russell Acked-by: WANG Cong Cc: Sam Ravnborg Cc: Jeff Dike Cc: Andi Kleen Cc: Huang Ying Cc: Li Wei Cc: Michael Ellerman Cc: Ingo Molnar Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/sections.h | 3 +++ include/asm-generic/vmlinux.lds.h | 9 +++++++++ include/linux/init.h | 3 +++ include/linux/module.h | 6 ++++++ init/Kconfig | 5 +++++ init/main.c | 12 ++++++++++++ kernel/module.c | 16 ++++++++++++++++ 7 files changed, 54 insertions(+) (limited to 'init/main.c') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 4ce48e878530..d083561337f2 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -14,6 +14,9 @@ extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; +/* Start and end of .ctors section - used for constructor calls. */ +extern char __ctors_start[], __ctors_end[]; + /* function descriptor handling (if any). Override * in asm/sections.h */ #ifndef dereference_function_descriptor diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 6bdba10fef4a..55413e568f07 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -440,12 +440,21 @@ INIT_TASK \ } +#ifdef CONFIG_CONSTRUCTORS +#define KERNEL_CTORS() VMLINUX_SYMBOL(__ctors_start) = .; \ + *(.ctors) \ + VMLINUX_SYMBOL(__ctors_end) = .; +#else +#define KERNEL_CTORS() +#endif + /* init and exit section handling */ #define INIT_DATA \ *(.init.data) \ DEV_DISCARD(init.data) \ CPU_DISCARD(init.data) \ MEM_DISCARD(init.data) \ + KERNEL_CTORS() \ *(.init.rodata) \ DEV_DISCARD(init.rodata) \ CPU_DISCARD(init.rodata) \ diff --git a/include/linux/init.h b/include/linux/init.h index 8c2c9989626d..13b633ed695e 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -134,6 +134,9 @@ typedef void (*exitcall_t)(void); extern initcall_t __con_initcall_start[], __con_initcall_end[]; extern initcall_t __security_initcall_start[], __security_initcall_end[]; +/* Used for contructor calls. */ +typedef void (*ctor_fn_t)(void); + /* Defined in init/main.c */ extern int do_one_initcall(initcall_t fn); extern char __initdata boot_command_line[]; diff --git a/include/linux/module.h b/include/linux/module.h index 505f20dcc1c7..098bdb7bfacf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -363,6 +363,12 @@ struct module local_t ref; #endif #endif + +#ifdef CONFIG_CONSTRUCTORS + /* Constructor functions. */ + ctor_fn_t *ctors; + unsigned int num_ctors; +#endif }; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} diff --git a/init/Kconfig b/init/Kconfig index c4b3c6d51a72..1ce05a4cb5f6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -16,6 +16,11 @@ config DEFCONFIG_LIST default "$ARCH_DEFCONFIG" default "arch/$ARCH/defconfig" +config CONSTRUCTORS + bool + depends on !UML + default y + menu "General setup" config EXPERIMENTAL diff --git a/init/main.c b/init/main.c index 0e7aedeaa05f..1a65fdd06318 100644 --- a/init/main.c +++ b/init/main.c @@ -720,6 +720,17 @@ asmlinkage void __init start_kernel(void) rest_init(); } +/* Call all constructor functions linked into the kernel. */ +static void __init do_ctors(void) +{ +#ifdef CONFIG_CONSTRUCTORS + ctor_fn_t *call = (ctor_fn_t *) __ctors_start; + + for (; call < (ctor_fn_t *) __ctors_end; call++) + (*call)(); +#endif +} + int initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); @@ -800,6 +811,7 @@ static void __init do_basic_setup(void) usermodehelper_init(); driver_init(); init_irq_proc(); + do_ctors(); do_initcalls(); } diff --git a/kernel/module.c b/kernel/module.c index 215aaab09e91..38928fcaff2b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2216,6 +2216,10 @@ static noinline struct module *load_module(void __user *umod, mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); #endif +#ifdef CONFIG_CONSTRUCTORS + mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors", + sizeof(*mod->ctors), &mod->num_ctors); +#endif #ifdef CONFIG_MARKERS mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", @@ -2389,6 +2393,17 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } +/* Call module constructors. */ +static void do_mod_ctors(struct module *mod) +{ +#ifdef CONFIG_CONSTRUCTORS + unsigned long i; + + for (i = 0; i < mod->num_ctors; i++) + mod->ctors[i](); +#endif +} + /* This is where the real work happens */ SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs) @@ -2417,6 +2432,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); + do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) ret = do_one_initcall(mod->init); -- cgit v1.2.3-59-g8ed1b From dcce284a259373f9e5570f2e33f79eca84fcf565 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Jun 2009 13:24:12 +1000 Subject: mm: Extend gfp masking to the page allocator The page allocator also needs the masking of gfp flags during boot, so this moves it out of slab/slub and uses it with the page allocator as well. Signed-off-by: Benjamin Herrenschmidt Acked-by: Pekka Enberg Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 9 ++++++++- init/main.c | 4 ++++ mm/page_alloc.c | 3 +++ mm/slab.c | 15 ++------------- mm/slub.c | 12 +----------- 5 files changed, 18 insertions(+), 25 deletions(-) (limited to 'init/main.c') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cfdb35d71bca..7c777a0da17a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -99,7 +99,7 @@ struct vm_area_struct; __GFP_NORETRY|__GFP_NOMEMALLOC) /* Control slab gfp mask during early boot */ -#define SLAB_GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS) +#define GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS) /* Control allocation constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) @@ -348,4 +348,11 @@ static inline void oom_killer_enable(void) oom_killer_disabled = false; } +extern gfp_t gfp_allowed_mask; + +static inline void set_gfp_allowed_mask(gfp_t mask) +{ + gfp_allowed_mask = mask; +} + #endif /* __LINUX_GFP_H */ diff --git a/init/main.c b/init/main.c index 1a65fdd06318..09131ec090c1 100644 --- a/init/main.c +++ b/init/main.c @@ -642,6 +642,10 @@ asmlinkage void __init start_kernel(void) "enabled early\n"); early_boot_irqs_on(); local_irq_enable(); + + /* Interrupts are enabled now so all GFP allocations are safe. */ + set_gfp_allowed_mask(__GFP_BITS_MASK); + kmem_cache_init_late(); /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a5f3c278c573..6f0753fe694c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -73,6 +73,7 @@ unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; unsigned long highest_memmap_pfn __read_mostly; int percpu_pagelist_fraction; +gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE int pageblock_order __read_mostly; @@ -1863,6 +1864,8 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct page *page; int migratetype = allocflags_to_migratetype(gfp_mask); + gfp_mask &= gfp_allowed_mask; + lockdep_trace_alloc(gfp_mask); might_sleep_if(gfp_mask & __GFP_WAIT); diff --git a/mm/slab.c b/mm/slab.c index d08692303f6e..e74a16e4ced6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -304,12 +304,6 @@ struct kmem_list3 { int free_touched; /* updated without locking */ }; -/* - * The slab allocator is initialized with interrupts disabled. Therefore, make - * sure early boot allocations don't accidentally enable interrupts. - */ -static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; - /* * Need this for bootstrapping a per node allocator. */ @@ -1559,11 +1553,6 @@ void __init kmem_cache_init_late(void) { struct kmem_cache *cachep; - /* - * Interrupts are enabled now so all GFP allocations are safe. - */ - slab_gfp_mask = __GFP_BITS_MASK; - /* 6) resize the head arrays to their final sizes */ mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) @@ -3307,7 +3296,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, unsigned long save_flags; void *ptr; - flags &= slab_gfp_mask; + flags &= gfp_allowed_mask; lockdep_trace_alloc(flags); @@ -3392,7 +3381,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) unsigned long save_flags; void *objp; - flags &= slab_gfp_mask; + flags &= gfp_allowed_mask; lockdep_trace_alloc(flags); diff --git a/mm/slub.c b/mm/slub.c index 4c6449310a0e..ce62b770e2fc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -179,12 +179,6 @@ static enum { SYSFS /* Sysfs up */ } slab_state = DOWN; -/* - * The slab allocator is initialized with interrupts disabled. Therefore, make - * sure early boot allocations don't accidentally enable interrupts. - */ -static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; - /* A list of all slab caches on the system */ static DECLARE_RWSEM(slub_lock); static LIST_HEAD(slab_caches); @@ -1692,7 +1686,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, unsigned long flags; unsigned int objsize; - gfpflags &= slab_gfp_mask; + gfpflags &= gfp_allowed_mask; lockdep_trace_alloc(gfpflags); might_sleep_if(gfpflags & __GFP_WAIT); @@ -3220,10 +3214,6 @@ void __init kmem_cache_init(void) void __init kmem_cache_init_late(void) { - /* - * Interrupts are enabled now so all GFP allocations are safe. - */ - slab_gfp_mask = __GFP_BITS_MASK; } /* -- cgit v1.2.3-59-g8ed1b From 31950eb66ff47c946fd9c65c2f8c94b6b7ba13fc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 22 Jun 2009 21:18:12 -0700 Subject: mm/init: cpu_hotplug_init() must be initialized before SLAB SLAB uses get/put_online_cpus() which use a mutex which is itself only initialized when cpu_hotplug_init() is called. Currently we hang suring boot in SLAB due to doing that too late. Reported by James Bottomley and Sachin Sant (and possibly others). Debugged by Benjamin Herrenschmidt. This just removes the dynamic initialization of the data structures, and replaces it with a static one, avoiding this dependency entirely, and removing one unnecessary special initcall. Tested-by: Sachin Sant Tested-by: James Bottomley Tested-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 5 ----- init/main.c | 1 - kernel/cpu.c | 13 +++++-------- 3 files changed, 5 insertions(+), 14 deletions(-) (limited to 'init/main.c') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2643d848df90..4d668e05d458 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -69,7 +69,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); -extern void cpu_hotplug_init(void); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); @@ -84,10 +83,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) { } -static inline void cpu_hotplug_init(void) -{ -} - static inline void cpu_maps_update_begin(void) { } diff --git a/init/main.c b/init/main.c index 09131ec090c1..4870dfeb9ee5 100644 --- a/init/main.c +++ b/init/main.c @@ -678,7 +678,6 @@ asmlinkage void __init start_kernel(void) #endif page_cgroup_init(); enable_debug_pagealloc(); - cpu_hotplug_init(); kmemtrace_init(); kmemleak_init(); debug_objects_mem_init(); diff --git a/kernel/cpu.c b/kernel/cpu.c index 395b6974dc8d..8ce10043e4ac 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -34,14 +34,11 @@ static struct { * an ongoing cpu hotplug operation. */ int refcount; -} cpu_hotplug; - -void __init cpu_hotplug_init(void) -{ - cpu_hotplug.active_writer = NULL; - mutex_init(&cpu_hotplug.lock); - cpu_hotplug.refcount = 0; -} +} cpu_hotplug = { + .active_writer = NULL, + .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), + .refcount = 0, +}; #ifdef CONFIG_HOTPLUG_CPU -- cgit v1.2.3-59-g8ed1b From d6647bdf98a0de19963de8d5d9698d469ed72097 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Jul 2009 17:11:50 +0900 Subject: init: set nr_cpu_ids before setup_per_cpu_areas() nr_cpu_ids is dependent only on cpu_possible_map and setup_per_cpu_areas() already depends on cpu_possible_map and will use nr_cpu_ids. Initialize nr_cpu_ids before setting up percpu areas. Signed-off-by: Tejun Heo --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 2c5ade79eb81..2d9d6bdfe7c9 100644 --- a/init/main.c +++ b/init/main.c @@ -584,8 +584,8 @@ asmlinkage void __init start_kernel(void) setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); - setup_per_cpu_areas(); setup_nr_cpu_ids(); + setup_per_cpu_areas(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ build_all_zonelists(); -- cgit v1.2.3-59-g8ed1b From 4a683bf94b8a10e2bb0da07aec3ac0a55e5de61f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 21 Aug 2009 12:53:36 +0200 Subject: tracing: Fix too large stack usage in do_one_initcall() One of my testboxes triggered this nasty stack overflow crash during SCSI probing: [ 5.874004] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA [ 5.875004] device: 'sda': device_add [ 5.878004] BUG: unable to handle kernel NULL pointer dereference at 00000a0c [ 5.878004] IP: [] print_context_stack+0x81/0x110 [ 5.878004] *pde = 00000000 [ 5.878004] Thread overran stack, or stack corrupted [ 5.878004] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 5.878004] last sysfs file: [ 5.878004] [ 5.878004] Pid: 1, comm: swapper Not tainted (2.6.31-rc6-tip-01272-g9919e28-dirty #5685) [ 5.878004] EIP: 0060:[] EFLAGS: 00010083 CPU: 0 [ 5.878004] EIP is at print_context_stack+0x81/0x110 [ 5.878004] EAX: cf8a3000 EBX: cf8a3fe4 ECX: 00000049 EDX: 00000000 [ 5.878004] ESI: b1cfce84 EDI: 00000000 EBP: cf8a3018 ESP: cf8a2ff4 [ 5.878004] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 [ 5.878004] Process swapper (pid: 1, ti=cf8a2000 task=cf8a8000 task.ti=cf8a3000) [ 5.878004] Stack: [ 5.878004] b1004867 fffff000 cf8a3ffc [ 5.878004] Call Trace: [ 5.878004] [] ? kernel_thread_helper+0x7/0x10 [ 5.878004] BUG: unable to handle kernel NULL pointer dereference at 00000a0c [ 5.878004] IP: [] print_context_stack+0x81/0x110 [ 5.878004] *pde = 00000000 [ 5.878004] Thread overran stack, or stack corrupted [ 5.878004] Oops: 0000 [#2] PREEMPT SMP DEBUG_PAGEALLOC The oops did not reveal any more details about the real stack that we have and the system got into an infinite loop of recursive pagefaults. So i booted with CONFIG_STACK_TRACER=y and the 'stacktrace' boot parameter. The box did not crash (timings/conditions probably changed a tiny bit to trigger the catastrophic crash), but the /debug/tracing/stack_trace file was rather revealing: Depth Size Location (72 entries) ----- ---- -------- 0) 3704 52 __change_page_attr+0xb8/0x290 1) 3652 24 __change_page_attr_set_clr+0x43/0x90 2) 3628 60 kernel_map_pages+0x108/0x120 3) 3568 40 prep_new_page+0x7d/0x130 4) 3528 84 get_page_from_freelist+0x106/0x420 5) 3444 116 __alloc_pages_nodemask+0xd7/0x550 6) 3328 36 allocate_slab+0xb1/0x100 7) 3292 36 new_slab+0x1c/0x160 8) 3256 36 __slab_alloc+0x133/0x2b0 9) 3220 4 kmem_cache_alloc+0x1bb/0x1d0 10) 3216 108 create_object+0x28/0x250 11) 3108 40 kmemleak_alloc+0x81/0xc0 12) 3068 24 kmem_cache_alloc+0x162/0x1d0 13) 3044 52 scsi_pool_alloc_command+0x29/0x70 14) 2992 20 scsi_host_alloc_command+0x22/0x70 15) 2972 24 __scsi_get_command+0x1b/0x90 16) 2948 28 scsi_get_command+0x35/0x90 17) 2920 24 scsi_setup_blk_pc_cmnd+0xd4/0x100 18) 2896 128 sd_prep_fn+0x332/0xa70 19) 2768 36 blk_peek_request+0xe7/0x1d0 20) 2732 56 scsi_request_fn+0x54/0x520 21) 2676 12 __generic_unplug_device+0x2b/0x40 22) 2664 24 blk_execute_rq_nowait+0x59/0x80 23) 2640 172 blk_execute_rq+0x6b/0xb0 24) 2468 32 scsi_execute+0xe0/0x140 25) 2436 64 scsi_execute_req+0x152/0x160 26) 2372 60 scsi_vpd_inquiry+0x6c/0x90 27) 2312 44 scsi_get_vpd_page+0x112/0x160 28) 2268 52 sd_revalidate_disk+0x1df/0x320 29) 2216 92 rescan_partitions+0x98/0x330 30) 2124 52 __blkdev_get+0x309/0x350 31) 2072 8 blkdev_get+0xf/0x20 32) 2064 44 register_disk+0xff/0x120 33) 2020 36 add_disk+0x6e/0xb0 34) 1984 44 sd_probe_async+0xfb/0x1d0 35) 1940 44 __async_schedule+0xf4/0x1b0 36) 1896 8 async_schedule+0x12/0x20 37) 1888 60 sd_probe+0x305/0x360 38) 1828 44 really_probe+0x63/0x170 39) 1784 36 driver_probe_device+0x5d/0x60 40) 1748 16 __device_attach+0x49/0x50 41) 1732 32 bus_for_each_drv+0x5b/0x80 42) 1700 24 device_attach+0x6b/0x70 43) 1676 16 bus_attach_device+0x47/0x60 44) 1660 76 device_add+0x33d/0x400 45) 1584 52 scsi_sysfs_add_sdev+0x6a/0x2c0 46) 1532 108 scsi_add_lun+0x44b/0x460 47) 1424 116 scsi_probe_and_add_lun+0x182/0x4e0 48) 1308 36 __scsi_add_device+0xd9/0xe0 49) 1272 44 ata_scsi_scan_host+0x10b/0x190 50) 1228 24 async_port_probe+0x96/0xd0 51) 1204 44 __async_schedule+0xf4/0x1b0 52) 1160 8 async_schedule+0x12/0x20 53) 1152 48 ata_host_register+0x171/0x1d0 54) 1104 60 ata_pci_sff_activate_host+0xf3/0x230 55) 1044 44 ata_pci_sff_init_one+0xea/0x100 56) 1000 48 amd_init_one+0xb2/0x190 57) 952 8 local_pci_probe+0x13/0x20 58) 944 32 pci_device_probe+0x68/0x90 59) 912 44 really_probe+0x63/0x170 60) 868 36 driver_probe_device+0x5d/0x60 61) 832 20 __driver_attach+0x89/0xa0 62) 812 32 bus_for_each_dev+0x5b/0x80 63) 780 12 driver_attach+0x1e/0x20 64) 768 72 bus_add_driver+0x14b/0x2d0 65) 696 36 driver_register+0x6e/0x150 66) 660 20 __pci_register_driver+0x53/0xc0 67) 640 8 amd_init+0x14/0x16 68) 632 572 do_one_initcall+0x2b/0x1d0 69) 60 12 do_basic_setup+0x56/0x6a 70) 48 20 kernel_init+0x84/0xce 71) 28 28 kernel_thread_helper+0x7/0x10 There's a lot of fat functions on that stack trace, but the largest of all is do_one_initcall(). This is due to the boot trace entry variables being on the stack. Fixing this is relatively easy, initcalls are fundamentally serialized, so we can move the local variables to file scope. Note that this large stack footprint was present for a couple of months already - what pushed my system over the edge was the addition of kmemleak to the call-chain: 6) 3328 36 allocate_slab+0xb1/0x100 7) 3292 36 new_slab+0x1c/0x160 8) 3256 36 __slab_alloc+0x133/0x2b0 9) 3220 4 kmem_cache_alloc+0x1bb/0x1d0 10) 3216 108 create_object+0x28/0x250 11) 3108 40 kmemleak_alloc+0x81/0xc0 12) 3068 24 kmem_cache_alloc+0x162/0x1d0 13) 3044 52 scsi_pool_alloc_command+0x29/0x70 This pushes the total to ~3800 bytes, only a tiny bit more was needed to corrupt the on-kernel-stack thread_info. The fix reduces the stack footprint from 572 bytes to 28 bytes. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Catalin Marinas Cc: Jens Axboe Cc: Linus Torvalds Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- init/main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'init/main.c') diff --git a/init/main.c b/init/main.c index 2c5ade79eb81..98e679e3494a 100644 --- a/init/main.c +++ b/init/main.c @@ -733,13 +733,14 @@ static void __init do_ctors(void) int initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); +static char msgbuf[64]; +static struct boot_trace_call call; +static struct boot_trace_ret ret; + int do_one_initcall(initcall_t fn) { int count = preempt_count(); ktime_t calltime, delta, rettime; - char msgbuf[64]; - struct boot_trace_call call; - struct boot_trace_ret ret; if (initcall_debug) { call.caller = task_pid_nr(current); -- cgit v1.2.3-59-g8ed1b