From f6adb9a6aec0ec9540e15f354e5cdec88b2aea33 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Mar 2012 15:01:49 -0700 Subject: um: don't restore current->blocked on error If we fail to setup the signal stack frame then we don't need to restore current->blocked because it is not modified by setup_signal_stack_*. Acked-by: Oleg Nesterov Acked-by: Richard Weinberger Tested-by: Richard Weinberger Signed-off-by: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/signal.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index e8b889d3bce7..8382e0b91e8b 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -65,13 +65,9 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr, #endif err = setup_signal_stack_si(sp, signr, ka, regs, info, oldset); - if (err) { - spin_lock_irq(¤t->sighand->siglock); - current->blocked = *oldset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + if (err) force_sigsegv(signr, current); - } else { + else { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); -- cgit v1.2.3-59-g8ed1b From d982d5955e9033015a2cc119aa7c0a878e275964 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Mar 2012 15:01:50 -0700 Subject: um: use set_current_blocked() and block_sigmask() As described in commit e6fa16ab9c1e ("signal: sigprocmask() should do retarget_shared_pending()") the modification of current->blocked is incorrect as we need to check whether the signal we're about to block is pending in the shared queue. Also, use the new helper function introduced in commit 5e6292c0f28f ("signal: add block_sigmask() for adding sigmask to current->blocked") which centralises the code for updating current->blocked after successfully delivering a signal and reduces the amount of duplicate code across architectures. In the past some architectures got this code wrong, so using this helper function should stop that from happening again. Acked-by: Oleg Nesterov Cc: Richard Weinberger Signed-off-by: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/signal.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 8382e0b91e8b..fb12f4c5e649 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -67,15 +67,8 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr, if (err) force_sigsegv(signr, current); - else { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, - &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + else + block_sigmask(ka, signr); return err; } @@ -158,12 +151,11 @@ int do_signal(void) */ long sys_sigsuspend(int history0, int history1, old_sigset_t mask) { + sigset_t blocked; + mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + siginitset(&blocked, mask); + set_current_blocked(&blocked); current->state = TASK_INTERRUPTIBLE; schedule(); -- cgit v1.2.3-59-g8ed1b From d314d74c695f967e10598467a326f41c78ed1e20 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 23 Mar 2012 15:01:51 -0700 Subject: nmi watchdog: do not use cpp symbol in Kconfig ARCH_HAS_NMI_WATCHDOG is a macro defined by arch, but config HARDLOCKUP_DETECTOR depends on it. This is wrong, ARCH_HAS_NMI_WATCHDOG has to be a Kconfig config, and arch's need it should select it explicitly. Signed-off-by: WANG Cong Acked-by: Don Zickus Acked-by: Mike Frysinger Cc: David Howells Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 3 +++ arch/blackfin/Kconfig | 1 + arch/blackfin/include/asm/irq.h | 4 ---- arch/mn10300/Kconfig | 1 + arch/mn10300/include/asm/reset-regs.h | 4 ---- arch/sparc/Kconfig | 1 + arch/sparc/include/asm/irq_64.h | 1 - include/linux/nmi.h | 2 +- lib/Kconfig.debug | 2 +- 9 files changed, 8 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 5b448a74d0f7..a6f14f622d13 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -120,6 +120,9 @@ config HAVE_KRETPROBES config HAVE_OPTPROBES bool + +config HAVE_NMI_WATCHDOG + bool # # An arch should select this if it provides all these things: # diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index abe5a9e85148..c1269a1085e1 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -36,6 +36,7 @@ config BLACKFIN select GENERIC_ATOMIC64 select GENERIC_IRQ_PROBE select IRQ_PER_CPU if SMP + select HAVE_NMI_WATCHDOG if NMI_WATCHDOG config GENERIC_CSUM def_bool y diff --git a/arch/blackfin/include/asm/irq.h b/arch/blackfin/include/asm/irq.h index 12f4060a31b0..89de539ed010 100644 --- a/arch/blackfin/include/asm/irq.h +++ b/arch/blackfin/include/asm/irq.h @@ -38,8 +38,4 @@ #include -#ifdef CONFIG_NMI_WATCHDOG -# define ARCH_HAS_NMI_WATCHDOG -#endif - #endif /* _BFIN_IRQ_H_ */ diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 8f1c40d5817e..3aa3de017159 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -5,6 +5,7 @@ config MN10300 select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_KGDB + select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER config AM33_2 def_bool n diff --git a/arch/mn10300/include/asm/reset-regs.h b/arch/mn10300/include/asm/reset-regs.h index 10c7502a113f..8ca2a42d365b 100644 --- a/arch/mn10300/include/asm/reset-regs.h +++ b/arch/mn10300/include/asm/reset-regs.h @@ -17,10 +17,6 @@ #ifdef __KERNEL__ -#ifdef CONFIG_MN10300_WD_TIMER -#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ -#endif - /* * watchdog timer registers */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index ca5580e4d813..1666de84d477 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -29,6 +29,7 @@ config SPARC select GENERIC_IRQ_SHOW select USE_GENERIC_SMP_HELPERS if SMP select GENERIC_PCI_IOMAP + select HAVE_NMI_WATCHDOG if SPARC64 config SPARC32 def_bool !64BIT diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 16dcae6d56e7..abf6afe82ca8 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -95,7 +95,6 @@ void arch_trigger_all_cpu_backtrace(void); extern void *hardirq_stack[NR_CPUS]; extern void *softirq_stack[NR_CPUS]; #define __ARCH_HAS_DO_SOFTIRQ -#define ARCH_HAS_NMI_WATCHDOG #define NO_IRQ 0xffffffff diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 2d304efc89df..db50840e6355 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -14,7 +14,7 @@ * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) #include extern void touch_nmi_watchdog(void); #else diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 05037dc9bde7..391003f7ab46 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -184,7 +184,7 @@ config LOCKUP_DETECTOR config HARDLOCKUP_DETECTOR def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ - !ARCH_HAS_NMI_WATCHDOG + !HAVE_NMI_WATCHDOG config BOOTPARAM_HARDLOCKUP_PANIC bool "Panic (Reboot) On Hard Lockups" -- cgit v1.2.3-59-g8ed1b From 03ff3efb64c8a64cb8cdf35e36bead5c78eb3024 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Fri, 23 Mar 2012 15:01:52 -0700 Subject: Remove remaining bits of io_remap_page_range() Commit 33bf56106d9b ("feature removal of io_remap_page_range()") removed io_remap_page_range(), but it is still included in some arch header files. It has no in-tree users. Signed-off-by: Javi Merino Cc: Russell King Cc: Aurelien Jacquiot Cc: Michal Simek Cc: Jonas Bonn Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/pgtable-nommu.h | 1 - arch/c6x/include/asm/pgtable.h | 3 --- arch/microblaze/include/asm/pgtable.h | 2 -- arch/openrisc/include/asm/pgtable.h | 1 - 4 files changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index ffc0e85775b4..7ec60d6075bf 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -79,7 +79,6 @@ extern unsigned int kobjsize(const void *objp); * No page table caches to initialise. */ #define pgtable_cache_init() do { } while (0) -#define io_remap_page_range remap_page_range #define io_remap_pfn_range remap_pfn_range diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h index 68c8af4f1f97..38a4312eb2cb 100644 --- a/arch/c6x/include/asm/pgtable.h +++ b/arch/c6x/include/asm/pgtable.h @@ -73,9 +73,6 @@ extern unsigned long empty_zero_page; #define pgtable_cache_init() do { } while (0) #define io_remap_pfn_range remap_pfn_range -#define io_remap_page_range(vma, vaddr, paddr, size, prot) \ - remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot) - #include #endif /* _ASM_C6X_PGTABLE_H */ diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index b2af42311a12..44dc67aa0277 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -543,8 +543,6 @@ extern unsigned long iopa(unsigned long addr); /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ #define kern_addr_valid(addr) (1) -#define io_remap_page_range remap_page_range - /* * No page table caches to initialise */ diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 043505d7f684..14c900cfd30a 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -455,7 +455,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * No page table caches to initialise */ #define pgtable_cache_init() do { } while (0) -#define io_remap_page_range remap_page_range typedef pte_t *pte_addr_t; -- cgit v1.2.3-59-g8ed1b From 307b1cd7ecd7f3dc5ce3d3860957f034f0abe4df Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 23 Mar 2012 15:02:03 -0700 Subject: bitops: rename for_each_set_bit_cont() in favor of analogous list.h function This renames for_each_set_bit_cont() to for_each_set_bit_from() because it is analogous to list_for_each_entry_from() in list.h rather than list_for_each_entry_continue(). This doesn't remove for_each_set_bit_cont() for now. Signed-off-by: Akinobu Mita Cc: Robert Richter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/perf_event.c | 4 ++-- include/linux/bitops.h | 5 ++++- tools/perf/util/include/linux/bitops.h | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0a18d16cb58d..fa2900c0e398 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -643,14 +643,14 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) /* Prefer fixed purpose counters */ if (x86_pmu.num_counters_fixed) { idx = X86_PMC_IDX_FIXED; - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { + for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; } } /* Grab the first unused counter starting with idx */ idx = sched->state.counter; - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { + for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; } diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 94300fe46cce..a78e358f0c17 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -27,11 +27,14 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) = find_next_bit((addr), (size), (bit) + 1)) /* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_cont(bit, addr, size) \ +#define for_each_set_bit_from(bit, addr, size) \ for ((bit) = find_next_bit((addr), (size), (bit)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) +#define for_each_set_bit_cont(bit, addr, size) \ + for_each_set_bit_from(bit, addr, size) + static __inline__ int get_bitmask_order(unsigned int count) { int order; diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index 62cdee78db7b..f1584833bd22 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -15,7 +15,7 @@ (bit) = find_next_bit((addr), (size), (bit) + 1)) /* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_cont(bit, addr, size) \ +#define for_each_set_bit_from(bit, addr, size) \ for ((bit) = find_next_bit((addr), (size), (bit)); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) -- cgit v1.2.3-59-g8ed1b From 0b2f4d4d76a09f02fa37bfa57909483448fac771 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 23 Mar 2012 15:02:06 -0700 Subject: x86: use for_each_clear_bit_from() Use for_each_clear_bit() to iterate over all the cleared bit in a memory region. Signed-off-by: Akinobu Mita Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/irqinit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 313fb5cddbce..43e2b1cff0a7 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -306,10 +306,10 @@ void __init native_init_IRQ(void) * us. (some of these will be overridden and become * 'special' SMP interrupts) */ - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { + i = FIRST_EXTERNAL_VECTOR; + for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ - if (!test_bit(i, used_vectors)) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); + set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } if (!acpi_ioapic && !of_ioapic) -- cgit v1.2.3-59-g8ed1b From 43aca3246cb7f736b20c11da9ce932a124a2a85a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Mar 2012 15:02:43 -0700 Subject: Hexagon: use set_current_blocked() and block_sigmask() As described in e6fa16ab9c1e ("signal: sigprocmask() should do retarget_shared_pending()") the modification of current->blocked is incorrect as we need to check whether the signal we're about to block is pending in the shared queue. Also, use the new helper function introduced in commit 5e6292c0f28f ("signal: add block_sigmask() for adding sigmask to current->blocked") which centralises the code for updating current->blocked after successfully delivering a signal and reduces the amount of duplicate code across architectures. In the past some architectures got this code wrong, so using this helper function should stop that from happening again. Acked-by: Oleg Nesterov Acked-by: Richard Kuo Signed-off-by: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/hexagon/kernel/signal.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c index b45be3181193..ecbab3457606 100644 --- a/arch/hexagon/kernel/signal.c +++ b/arch/hexagon/kernel/signal.c @@ -192,12 +192,7 @@ static int handle_signal(int sig, siginfo_t *info, struct k_sigaction *ka, if (rc) return rc; - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + block_sigmask(ka, sig); return 0; } @@ -305,10 +300,7 @@ asmlinkage int sys_rt_sigreturn(void) goto badframe; sigdelsetmask(&blocked, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = blocked; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&blocked); if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; -- cgit v1.2.3-59-g8ed1b From 909af768e88867016f427264ae39d27a57b6a8ed Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 23 Mar 2012 15:02:51 -0700 Subject: coredump: remove VM_ALWAYSDUMP flag The motivation for this patchset was that I was looking at a way for a qemu-kvm process, to exclude the guest memory from its core dump, which can be quite large. There are already a number of filter flags in /proc//coredump_filter, however, these allow one to specify 'types' of kernel memory, not specific address ranges (which is needed in this case). Since there are no more vma flags available, the first patch eliminates the need for the 'VM_ALWAYSDUMP' flag. The flag is used internally by the kernel to mark vdso and vsyscall pages. However, it is simple enough to check if a vma covers a vdso or vsyscall page without the need for this flag. The second patch then replaces the 'VM_ALWAYSDUMP' flag with a new 'VM_NODUMP' flag, which can be set by userspace using new madvise flags: 'MADV_DONTDUMP', and unset via 'MADV_DODUMP'. The core dump filters continue to work the same as before unless 'MADV_DONTDUMP' is set on the region. The qemu code which implements this features is at: http://people.redhat.com/~jbaron/qemu-dump/qemu-dump.patch In my testing the qemu core dump shrunk from 383MB -> 13MB with this patch. I also believe that the 'MADV_DONTDUMP' flag might be useful for security sensitive apps, which might want to select which areas are dumped. This patch: The VM_ALWAYSDUMP flag is currently used by the coredump code to indicate that a vma is part of a vsyscall or vdso section. However, we can determine if a vma is in one these sections by checking it against the gate_vma and checking for a non-NULL return value from arch_vma_name(). Thus, freeing a valuable vma bit. Signed-off-by: Jason Baron Acked-by: Roland McGrath Cc: Chris Metcalf Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/process.c | 3 +-- arch/hexagon/kernel/vdso.c | 3 +-- arch/mips/kernel/vdso.c | 3 +-- arch/powerpc/kernel/vdso.c | 10 ++-------- arch/s390/kernel/vdso.c | 10 ++-------- arch/sh/kernel/vsyscall/vsyscall.c | 3 +-- arch/tile/mm/elf.c | 8 +------- arch/unicore32/kernel/process.c | 2 +- arch/x86/um/mem_32.c | 8 -------- arch/x86/um/vdso/vma.c | 3 +-- arch/x86/vdso/vdso32-setup.c | 17 ++--------------- arch/x86/vdso/vma.c | 3 +-- fs/binfmt_elf.c | 27 +++++++++++++++++++++++++-- include/linux/mm.h | 1 - mm/memory.c | 8 +------- 15 files changed, 40 insertions(+), 69 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index c2ae3cd331fe..219e4efee1a6 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -533,8 +533,7 @@ int vectors_user_mapping(void) struct mm_struct *mm = current->mm; return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, VM_READ | VM_EXEC | - VM_MAYREAD | VM_MAYEXEC | - VM_ALWAYSDUMP | VM_RESERVED, + VM_MAYREAD | VM_MAYEXEC | VM_RESERVED, NULL); } diff --git a/arch/hexagon/kernel/vdso.c b/arch/hexagon/kernel/vdso.c index 16277c33308a..f212a453b527 100644 --- a/arch/hexagon/kernel/vdso.c +++ b/arch/hexagon/kernel/vdso.c @@ -78,8 +78,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* MAYWRITE to allow gdb to COW and set breakpoints. */ ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_page); if (ret) diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index e5cdfd603f8f..0f1af58b036a 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -88,8 +88,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_page); if (ret) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 7d14bb697d40..d36ee1055f88 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -263,17 +263,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * the "data" page of the vDSO or you'll stop getting kernel updates * and your nice userland gettimeofday will be totally dead. * It's fine to use that for setting breakpoints in the vDSO code - * pages though - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. + * pages though. */ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); if (rc) { current->mm->context.vdso_base = 0; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index e704a9965f90..9c80138206b0 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -241,17 +241,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * on the "data" page of the vDSO or you'll stop getting kernel * updates and your nice userland gettimeofday will be totally dead. * It's fine to use that for setting breakpoints in the vDSO code - * pages though - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. + * pages though. */ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); if (rc) current->mm->context.vdso_base = 0; diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 1d6d51a1ce79..5ca579720a09 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -73,8 +73,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | - VM_ALWAYSDUMP, + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, syscall_pages); if (unlikely(ret)) goto up_fail; diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index 55e58e93bfc5..1a00fb64fc88 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -117,17 +117,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, /* * MAYWRITE to allow gdb to COW and set breakpoints - * - * Make sure the vDSO gets into every core dump. Dumping its - * contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to - * see what PC values meant. */ vdso_base = VDSO_BASE; retval = install_special_mapping(mm, vdso_base, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pages); #ifndef __tilegx__ diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index 52edc2b62873..432b4291f37b 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -381,7 +381,7 @@ int vectors_user_mapping(void) return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC | - VM_ALWAYSDUMP | VM_RESERVED, + VM_RESERVED, NULL); } diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c index 639900a6fde9..f40281e5d6a2 100644 --- a/arch/x86/um/mem_32.c +++ b/arch/x86/um/mem_32.c @@ -23,14 +23,6 @@ static int __init gate_vma_init(void) gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; gate_vma.vm_page_prot = __P101; - /* - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - gate_vma.vm_flags |= VM_ALWAYSDUMP; - return 0; } __initcall(gate_vma_init); diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index 91f4ec9a0a56..af91901babb8 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c @@ -64,8 +64,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdsop); up_write(&mm->mmap_sem); diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 468d591dde31..a944020fa859 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -250,13 +250,7 @@ static int __init gate_vma_init(void) gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; gate_vma.vm_page_prot = __P101; - /* - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - gate_vma.vm_flags |= VM_ALWAYSDUMP; + return 0; } @@ -343,17 +337,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (compat_uses_vma || !compat) { /* * MAYWRITE to allow gdb to COW and set breakpoints - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully - * interpretable later without matching up the same - * kernel and hardware config to see what PC values - * meant. */ ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso32_pages); if (ret) diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 153407c35b75..17e18279649f 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -124,8 +124,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) ret = install_special_mapping(mm, addr, vdso_size, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pages); if (ret) { current->mm->context.vdso = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 81878b78c9d4..b64be5b5ac21 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1092,6 +1092,29 @@ out: * Jeremy Fitzhardinge */ +/* + * The purpose of always_dump_vma() is to make sure that special kernel mappings + * that are useful for post-mortem analysis are included in every core dump. + * In that way we ensure that the core dump is fully interpretable later + * without matching up the same kernel and hardware config to see what PC values + * meant. These special mappings include - vDSO, vsyscall, and other + * architecture specific mappings + */ +static bool always_dump_vma(struct vm_area_struct *vma) +{ + /* Any vsyscall mappings? */ + if (vma == get_gate_vma(vma->vm_mm)) + return true; + /* + * arch_vma_name() returns non-NULL for special architecture mappings, + * such as vDSO sections. + */ + if (arch_vma_name(vma)) + return true; + + return false; +} + /* * Decide what to dump of a segment, part, all or none. */ @@ -1100,8 +1123,8 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, { #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) - /* The vma can be set up to tell us the answer directly. */ - if (vma->vm_flags & VM_ALWAYSDUMP) + /* always dump the vdso and vsyscall sections */ + if (always_dump_vma(vma)) goto whole; /* Hugetlb memory check */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 7330742e7973..2de2ddba51d4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -111,7 +111,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */ #endif #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ -#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ diff --git a/mm/memory.c b/mm/memory.c index 3416b6e018d6..6105f475fa86 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3623,13 +3623,7 @@ static int __init gate_vma_init(void) gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; gate_vma.vm_page_prot = __P101; - /* - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - gate_vma.vm_flags |= VM_ALWAYSDUMP; + return 0; } __initcall(gate_vma_init); -- cgit v1.2.3-59-g8ed1b From accb61fe7bb0f5c2a4102239e4981650f9048519 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 23 Mar 2012 15:02:51 -0700 Subject: coredump: add VM_NODUMP, MADV_NODUMP, MADV_CLEAR_NODUMP Since we no longer need the VM_ALWAYSDUMP flag, let's use the freed bit for 'VM_NODUMP' flag. The idea is is to add a new madvise() flag: MADV_DONTDUMP, which can be set by applications to specifically request memory regions which should not dump core. The specific application I have in mind is qemu: we can add a flag there that wouldn't dump all of guest memory when qemu dumps core. This flag might also be useful for security sensitive apps that want to absolutely make sure that parts of memory are not dumped. To clear the flag use: MADV_DODUMP. [akpm@linux-foundation.org: s/MADV_NODUMP/MADV_DONTDUMP/, s/MADV_CLEAR_NODUMP/MADV_DODUMP/, per Roland] [akpm@linux-foundation.org: fix up the architectures which broke] Signed-off-by: Jason Baron Acked-by: Roland McGrath Cc: Chris Metcalf Cc: Avi Kivity Cc: Ralf Baechle Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: "James E.J. Bottomley" Cc: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/mman.h | 4 ++++ arch/mips/include/asm/mman.h | 4 ++++ arch/parisc/include/asm/mman.h | 4 ++++ arch/xtensa/include/asm/mman.h | 4 ++++ fs/binfmt_elf.c | 3 +++ include/asm-generic/mman-common.h | 4 ++++ include/linux/mm.h | 1 + mm/madvise.c | 8 ++++++++ 8 files changed, 32 insertions(+) (limited to 'arch') diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h index 72db984f8781..cbeb3616a28e 100644 --- a/arch/alpha/include/asm/mman.h +++ b/arch/alpha/include/asm/mman.h @@ -56,6 +56,10 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h index 785b4ea4ec3f..46d3da0d4b92 100644 --- a/arch/mips/include/asm/mman.h +++ b/arch/mips/include/asm/mman.h @@ -80,6 +80,10 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h index f5b7bf5fba68..12219ebce869 100644 --- a/arch/parisc/include/asm/mman.h +++ b/arch/parisc/include/asm/mman.h @@ -62,6 +62,10 @@ #define MADV_HUGEPAGE 67 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 68 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 69 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 #define MAP_VARIABLE 0 diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h index 30789010733d..25bc6c1309c3 100644 --- a/arch/xtensa/include/asm/mman.h +++ b/arch/xtensa/include/asm/mman.h @@ -86,6 +86,10 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b64be5b5ac21..504b6eee50a9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1127,6 +1127,9 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, if (always_dump_vma(vma)) goto whole; + if (vma->vm_flags & VM_NODUMP) + return 0; + /* Hugetlb memory check */ if (vma->vm_flags & VM_HUGETLB) { if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h index 787abbb6d867..d030d2c2647a 100644 --- a/include/asm-generic/mman-common.h +++ b/include/asm-generic/mman-common.h @@ -48,6 +48,10 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/include/linux/mm.h b/include/linux/mm.h index 2de2ddba51d4..a6fabdfd34c5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -111,6 +111,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */ #endif #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ +#define VM_NODUMP 0x04000000 /* Do not include in the core dump */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ diff --git a/mm/madvise.c b/mm/madvise.c index f5ab745672b7..1ccbba5b6674 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -65,6 +65,12 @@ static long madvise_behavior(struct vm_area_struct * vma, } new_flags &= ~VM_DONTCOPY; break; + case MADV_DONTDUMP: + new_flags |= VM_NODUMP; + break; + case MADV_DODUMP: + new_flags &= ~VM_NODUMP; + break; case MADV_MERGEABLE: case MADV_UNMERGEABLE: error = ksm_madvise(vma, start, end, behavior, &new_flags); @@ -293,6 +299,8 @@ madvise_behavior_valid(int behavior) case MADV_HUGEPAGE: case MADV_NOHUGEPAGE: #endif + case MADV_DONTDUMP: + case MADV_DODUMP: return 1; default: -- cgit v1.2.3-59-g8ed1b