From f6adb9a6aec0ec9540e15f354e5cdec88b2aea33 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 23 Mar 2012 15:01:49 -0700
Subject: um: don't restore current->blocked on error

If we fail to setup the signal stack frame then we don't need to restore
current->blocked because it is not modified by setup_signal_stack_*.

Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Richard Weinberger <richard@nod.at>
Tested-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/kernel/signal.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index e8b889d3bce7..8382e0b91e8b 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -65,13 +65,9 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr,
 #endif
 		err = setup_signal_stack_si(sp, signr, ka, regs, info, oldset);
 
-	if (err) {
-		spin_lock_irq(&current->sighand->siglock);
-		current->blocked = *oldset;
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
+	if (err)
 		force_sigsegv(signr, current);
-	} else {
+	else {
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked, &current->blocked,
 			  &ka->sa.sa_mask);
-- 
cgit v1.2.3-59-g8ed1b


From d982d5955e9033015a2cc119aa7c0a878e275964 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 23 Mar 2012 15:01:50 -0700
Subject: um: use set_current_blocked() and block_sigmask()

As described in commit e6fa16ab9c1e ("signal: sigprocmask() should do
retarget_shared_pending()") the modification of current->blocked is
incorrect as we need to check whether the signal we're about to block is
pending in the shared queue.

Also, use the new helper function introduced in commit 5e6292c0f28f
("signal: add block_sigmask() for adding sigmask to current->blocked")
which centralises the code for updating current->blocked after
successfully delivering a signal and reduces the amount of duplicate
code across architectures.  In the past some architectures got this code
wrong, so using this helper function should stop that from happening
again.

Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Richard Weinberger <richard@nod.at>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/kernel/signal.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 8382e0b91e8b..fb12f4c5e649 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -67,15 +67,8 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr,
 
 	if (err)
 		force_sigsegv(signr, current);
-	else {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked, &current->blocked,
-			  &ka->sa.sa_mask);
-		if (!(ka->sa.sa_flags & SA_NODEFER))
-			sigaddset(&current->blocked, signr);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	else
+		block_sigmask(ka, signr);
 
 	return err;
 }
@@ -158,12 +151,11 @@ int do_signal(void)
  */
 long sys_sigsuspend(int history0, int history1, old_sigset_t mask)
 {
+	sigset_t blocked;
+
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sighand->siglock);
-	current->saved_sigmask = current->blocked;
-	siginitset(&current->blocked, mask);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	siginitset(&blocked, mask);
+	set_current_blocked(&blocked);
 
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
-- 
cgit v1.2.3-59-g8ed1b


From d314d74c695f967e10598467a326f41c78ed1e20 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 23 Mar 2012 15:01:51 -0700
Subject: nmi watchdog: do not use cpp symbol in Kconfig

ARCH_HAS_NMI_WATCHDOG is a macro defined by arch, but config
HARDLOCKUP_DETECTOR depends on it.  This is wrong, ARCH_HAS_NMI_WATCHDOG
has to be a Kconfig config, and arch's need it should select it
explicitly.

Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Cc: David Howells <dhowells@redhat.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig                          | 3 +++
 arch/blackfin/Kconfig                 | 1 +
 arch/blackfin/include/asm/irq.h       | 4 ----
 arch/mn10300/Kconfig                  | 1 +
 arch/mn10300/include/asm/reset-regs.h | 4 ----
 arch/sparc/Kconfig                    | 1 +
 arch/sparc/include/asm/irq_64.h       | 1 -
 include/linux/nmi.h                   | 2 +-
 lib/Kconfig.debug                     | 2 +-
 9 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index 5b448a74d0f7..a6f14f622d13 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -120,6 +120,9 @@ config HAVE_KRETPROBES
 
 config HAVE_OPTPROBES
 	bool
+
+config HAVE_NMI_WATCHDOG
+	bool
 #
 # An arch should select this if it provides all these things:
 #
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index abe5a9e85148..c1269a1085e1 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -36,6 +36,7 @@ config BLACKFIN
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_PROBE
 	select IRQ_PER_CPU if SMP
+	select HAVE_NMI_WATCHDOG if NMI_WATCHDOG
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/blackfin/include/asm/irq.h b/arch/blackfin/include/asm/irq.h
index 12f4060a31b0..89de539ed010 100644
--- a/arch/blackfin/include/asm/irq.h
+++ b/arch/blackfin/include/asm/irq.h
@@ -38,8 +38,4 @@
 
 #include <asm-generic/irq.h>
 
-#ifdef CONFIG_NMI_WATCHDOG
-# define ARCH_HAS_NMI_WATCHDOG
-#endif
-
 #endif				/* _BFIN_IRQ_H_ */
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 8f1c40d5817e..3aa3de017159 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -5,6 +5,7 @@ config MN10300
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_KGDB
+	select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER
 
 config AM33_2
 	def_bool n
diff --git a/arch/mn10300/include/asm/reset-regs.h b/arch/mn10300/include/asm/reset-regs.h
index 10c7502a113f..8ca2a42d365b 100644
--- a/arch/mn10300/include/asm/reset-regs.h
+++ b/arch/mn10300/include/asm/reset-regs.h
@@ -17,10 +17,6 @@
 
 #ifdef __KERNEL__
 
-#ifdef CONFIG_MN10300_WD_TIMER
-#define ARCH_HAS_NMI_WATCHDOG		/* See include/linux/nmi.h */
-#endif
-
 /*
  * watchdog timer registers
  */
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index ca5580e4d813..1666de84d477 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -29,6 +29,7 @@ config SPARC
 	select GENERIC_IRQ_SHOW
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select GENERIC_PCI_IOMAP
+	select HAVE_NMI_WATCHDOG if SPARC64
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 16dcae6d56e7..abf6afe82ca8 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -95,7 +95,6 @@ void arch_trigger_all_cpu_backtrace(void);
 extern void *hardirq_stack[NR_CPUS];
 extern void *softirq_stack[NR_CPUS];
 #define __ARCH_HAS_DO_SOFTIRQ
-#define ARCH_HAS_NMI_WATCHDOG
 
 #define NO_IRQ		0xffffffff
 
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 2d304efc89df..db50840e6355 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -14,7 +14,7 @@
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 #include <asm/nmi.h>
 extern void touch_nmi_watchdog(void);
 #else
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 05037dc9bde7..391003f7ab46 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -184,7 +184,7 @@ config LOCKUP_DETECTOR
 
 config HARDLOCKUP_DETECTOR
 	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
-		 !ARCH_HAS_NMI_WATCHDOG
+		 !HAVE_NMI_WATCHDOG
 
 config BOOTPARAM_HARDLOCKUP_PANIC
 	bool "Panic (Reboot) On Hard Lockups"
-- 
cgit v1.2.3-59-g8ed1b


From 03ff3efb64c8a64cb8cdf35e36bead5c78eb3024 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Fri, 23 Mar 2012 15:01:52 -0700
Subject: Remove remaining bits of io_remap_page_range()

Commit 33bf56106d9b ("feature removal of io_remap_page_range()") removed
io_remap_page_range(), but it is still included in some arch header
files.  It has no in-tree users.

Signed-off-by: Javi Merino <javi.merino@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/pgtable-nommu.h  | 1 -
 arch/c6x/include/asm/pgtable.h        | 3 ---
 arch/microblaze/include/asm/pgtable.h | 2 --
 arch/openrisc/include/asm/pgtable.h   | 1 -
 4 files changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index ffc0e85775b4..7ec60d6075bf 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -79,7 +79,6 @@ extern unsigned int kobjsize(const void *objp);
  * No page table caches to initialise.
  */
 #define pgtable_cache_init()	do { } while (0)
-#define io_remap_page_range	remap_page_range
 #define io_remap_pfn_range	remap_pfn_range
 
 
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 68c8af4f1f97..38a4312eb2cb 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -73,9 +73,6 @@ extern unsigned long empty_zero_page;
 #define pgtable_cache_init()   do { } while (0)
 #define io_remap_pfn_range      remap_pfn_range
 
-#define io_remap_page_range(vma, vaddr, paddr, size, prot)		\
-		remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
-
 #include <asm-generic/pgtable.h>
 
 #endif /* _ASM_C6X_PGTABLE_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index b2af42311a12..44dc67aa0277 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -543,8 +543,6 @@ extern unsigned long iopa(unsigned long addr);
 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
 #define kern_addr_valid(addr)	(1)
 
-#define io_remap_page_range remap_page_range
-
 /*
  * No page table caches to initialise
  */
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 043505d7f684..14c900cfd30a 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -455,7 +455,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
  * No page table caches to initialise
  */
 #define pgtable_cache_init()		do { } while (0)
-#define io_remap_page_range		remap_page_range
 
 typedef pte_t *pte_addr_t;
 
-- 
cgit v1.2.3-59-g8ed1b


From 307b1cd7ecd7f3dc5ce3d3860957f034f0abe4df Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 23 Mar 2012 15:02:03 -0700
Subject: bitops: rename for_each_set_bit_cont() in favor of analogous list.h
 function

This renames for_each_set_bit_cont() to for_each_set_bit_from() because
it is analogous to list_for_each_entry_from() in list.h rather than
list_for_each_entry_continue().

This doesn't remove for_each_set_bit_cont() for now.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/cpu/perf_event.c       | 4 ++--
 include/linux/bitops.h                 | 5 ++++-
 tools/perf/util/include/linux/bitops.h | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0a18d16cb58d..fa2900c0e398 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -643,14 +643,14 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
 	/* Prefer fixed purpose counters */
 	if (x86_pmu.num_counters_fixed) {
 		idx = X86_PMC_IDX_FIXED;
-		for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
 			if (!__test_and_set_bit(idx, sched->state.used))
 				goto done;
 		}
 	}
 	/* Grab the first unused counter starting with idx */
 	idx = sched->state.counter;
-	for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
+	for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
 		if (!__test_and_set_bit(idx, sched->state.used))
 			goto done;
 	}
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 94300fe46cce..a78e358f0c17 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -27,11 +27,14 @@ extern unsigned long __sw_hweight64(__u64 w);
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
 
 /* same as for_each_set_bit() but use bit as value to start with */
-#define for_each_set_bit_cont(bit, addr, size) \
+#define for_each_set_bit_from(bit, addr, size) \
 	for ((bit) = find_next_bit((addr), (size), (bit));	\
 	     (bit) < (size);					\
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
 
+#define for_each_set_bit_cont(bit, addr, size) \
+	for_each_set_bit_from(bit, addr, size)
+
 static __inline__ int get_bitmask_order(unsigned int count)
 {
 	int order;
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index 62cdee78db7b..f1584833bd22 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -15,7 +15,7 @@
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
 
 /* same as for_each_set_bit() but use bit as value to start with */
-#define for_each_set_bit_cont(bit, addr, size) \
+#define for_each_set_bit_from(bit, addr, size) \
 	for ((bit) = find_next_bit((addr), (size), (bit));	\
 	     (bit) < (size);					\
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
-- 
cgit v1.2.3-59-g8ed1b


From 0b2f4d4d76a09f02fa37bfa57909483448fac771 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 23 Mar 2012 15:02:06 -0700
Subject: x86: use for_each_clear_bit_from()

Use for_each_clear_bit() to iterate over all the cleared bit in a
memory region.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/irqinit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 313fb5cddbce..43e2b1cff0a7 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -306,10 +306,10 @@ void __init native_init_IRQ(void)
 	 * us. (some of these will be overridden and become
 	 * 'special' SMP interrupts)
 	 */
-	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+	i = FIRST_EXTERNAL_VECTOR;
+	for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
 		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-		if (!test_bit(i, used_vectors))
-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
 	}
 
 	if (!acpi_ioapic && !of_ioapic)
-- 
cgit v1.2.3-59-g8ed1b


From 43aca3246cb7f736b20c11da9ce932a124a2a85a Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 23 Mar 2012 15:02:43 -0700
Subject: Hexagon: use set_current_blocked() and block_sigmask()

As described in e6fa16ab9c1e ("signal: sigprocmask() should do
retarget_shared_pending()") the modification of current->blocked is
incorrect as we need to check whether the signal we're about to block is
pending in the shared queue.

Also, use the new helper function introduced in commit 5e6292c0f28f
("signal: add block_sigmask() for adding sigmask to current->blocked")
which centralises the code for updating current->blocked after
successfully delivering a signal and reduces the amount of duplicate
code across architectures.  In the past some architectures got this code
wrong, so using this helper function should stop that from happening
again.

Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Richard Kuo <rkuo@codeaurora.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/hexagon/kernel/signal.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index b45be3181193..ecbab3457606 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -192,12 +192,7 @@ static int handle_signal(int sig, siginfo_t *info, struct k_sigaction *ka,
 	if (rc)
 		return rc;
 
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked, sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	block_sigmask(ka, sig);
 
 	return 0;
 }
@@ -305,10 +300,7 @@ asmlinkage int sys_rt_sigreturn(void)
 		goto badframe;
 
 	sigdelsetmask(&blocked, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = blocked;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&blocked);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
-- 
cgit v1.2.3-59-g8ed1b


From 909af768e88867016f427264ae39d27a57b6a8ed Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 23 Mar 2012 15:02:51 -0700
Subject: coredump: remove VM_ALWAYSDUMP flag

The motivation for this patchset was that I was looking at a way for a
qemu-kvm process, to exclude the guest memory from its core dump, which
can be quite large.  There are already a number of filter flags in
/proc/<pid>/coredump_filter, however, these allow one to specify 'types'
of kernel memory, not specific address ranges (which is needed in this
case).

Since there are no more vma flags available, the first patch eliminates
the need for the 'VM_ALWAYSDUMP' flag.  The flag is used internally by
the kernel to mark vdso and vsyscall pages.  However, it is simple
enough to check if a vma covers a vdso or vsyscall page without the need
for this flag.

The second patch then replaces the 'VM_ALWAYSDUMP' flag with a new
'VM_NODUMP' flag, which can be set by userspace using new madvise flags:
'MADV_DONTDUMP', and unset via 'MADV_DODUMP'.  The core dump filters
continue to work the same as before unless 'MADV_DONTDUMP' is set on the
region.

The qemu code which implements this features is at:

  http://people.redhat.com/~jbaron/qemu-dump/qemu-dump.patch

In my testing the qemu core dump shrunk from 383MB -> 13MB with this
patch.

I also believe that the 'MADV_DONTDUMP' flag might be useful for
security sensitive apps, which might want to select which areas are
dumped.

This patch:

The VM_ALWAYSDUMP flag is currently used by the coredump code to
indicate that a vma is part of a vsyscall or vdso section.  However, we
can determine if a vma is in one these sections by checking it against
the gate_vma and checking for a non-NULL return value from
arch_vma_name().  Thus, freeing a valuable vma bit.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Acked-by: Roland McGrath <roland@hack.frob.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/process.c          |  3 +--
 arch/hexagon/kernel/vdso.c         |  3 +--
 arch/mips/kernel/vdso.c            |  3 +--
 arch/powerpc/kernel/vdso.c         | 10 ++--------
 arch/s390/kernel/vdso.c            | 10 ++--------
 arch/sh/kernel/vsyscall/vsyscall.c |  3 +--
 arch/tile/mm/elf.c                 |  8 +-------
 arch/unicore32/kernel/process.c    |  2 +-
 arch/x86/um/mem_32.c               |  8 --------
 arch/x86/um/vdso/vma.c             |  3 +--
 arch/x86/vdso/vdso32-setup.c       | 17 ++---------------
 arch/x86/vdso/vma.c                |  3 +--
 fs/binfmt_elf.c                    | 27 +++++++++++++++++++++++++--
 include/linux/mm.h                 |  1 -
 mm/memory.c                        |  8 +-------
 15 files changed, 40 insertions(+), 69 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index c2ae3cd331fe..219e4efee1a6 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -533,8 +533,7 @@ int vectors_user_mapping(void)
 	struct mm_struct *mm = current->mm;
 	return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
 				       VM_READ | VM_EXEC |
-				       VM_MAYREAD | VM_MAYEXEC |
-				       VM_ALWAYSDUMP | VM_RESERVED,
+				       VM_MAYREAD | VM_MAYEXEC | VM_RESERVED,
 				       NULL);
 }
 
diff --git a/arch/hexagon/kernel/vdso.c b/arch/hexagon/kernel/vdso.c
index 16277c33308a..f212a453b527 100644
--- a/arch/hexagon/kernel/vdso.c
+++ b/arch/hexagon/kernel/vdso.c
@@ -78,8 +78,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	/* MAYWRITE to allow gdb to COW and set breakpoints. */
 	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
 				      VM_READ|VM_EXEC|
-				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-				      VM_ALWAYSDUMP,
+				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 				      &vdso_page);
 
 	if (ret)
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index e5cdfd603f8f..0f1af58b036a 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -88,8 +88,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	ret = install_special_mapping(mm, addr, PAGE_SIZE,
 				      VM_READ|VM_EXEC|
-				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-				      VM_ALWAYSDUMP,
+				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 				      &vdso_page);
 
 	if (ret)
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 7d14bb697d40..d36ee1055f88 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -263,17 +263,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	 * the "data" page of the vDSO or you'll stop getting kernel updates
 	 * and your nice userland gettimeofday will be totally dead.
 	 * It's fine to use that for setting breakpoints in the vDSO code
-	 * pages though
-	 *
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
+	 * pages though.
 	 */
 	rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
 				     VM_READ|VM_EXEC|
-				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-				     VM_ALWAYSDUMP,
+				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 				     vdso_pagelist);
 	if (rc) {
 		current->mm->context.vdso_base = 0;
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index e704a9965f90..9c80138206b0 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -241,17 +241,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	 * on the "data" page of the vDSO or you'll stop getting kernel
 	 * updates and your nice userland gettimeofday will be totally dead.
 	 * It's fine to use that for setting breakpoints in the vDSO code
-	 * pages though
-	 *
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
+	 * pages though.
 	 */
 	rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
 				     VM_READ|VM_EXEC|
-				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-				     VM_ALWAYSDUMP,
+				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 				     vdso_pagelist);
 	if (rc)
 		current->mm->context.vdso_base = 0;
diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index 1d6d51a1ce79..5ca579720a09 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -73,8 +73,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	ret = install_special_mapping(mm, addr, PAGE_SIZE,
 				      VM_READ | VM_EXEC |
-				      VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |
-				      VM_ALWAYSDUMP,
+				      VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
 				      syscall_pages);
 	if (unlikely(ret))
 		goto up_fail;
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index 55e58e93bfc5..1a00fb64fc88 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -117,17 +117,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 
 	/*
 	 * MAYWRITE to allow gdb to COW and set breakpoints
-	 *
-	 * Make sure the vDSO gets into every core dump.  Dumping its
-	 * contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to
-	 * see what PC values meant.
 	 */
 	vdso_base = VDSO_BASE;
 	retval = install_special_mapping(mm, vdso_base, PAGE_SIZE,
 					 VM_READ|VM_EXEC|
-					 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-					 VM_ALWAYSDUMP,
+					 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 					 vdso_pages);
 
 #ifndef __tilegx__
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index 52edc2b62873..432b4291f37b 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -381,7 +381,7 @@ int vectors_user_mapping(void)
 	return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
 				       VM_READ | VM_EXEC |
 				       VM_MAYREAD | VM_MAYEXEC |
-				       VM_ALWAYSDUMP | VM_RESERVED,
+				       VM_RESERVED,
 				       NULL);
 }
 
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
index 639900a6fde9..f40281e5d6a2 100644
--- a/arch/x86/um/mem_32.c
+++ b/arch/x86/um/mem_32.c
@@ -23,14 +23,6 @@ static int __init gate_vma_init(void)
 	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
 	gate_vma.vm_page_prot = __P101;
 
-	/*
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
-	 */
-	gate_vma.vm_flags |= VM_ALWAYSDUMP;
-
 	return 0;
 }
 __initcall(gate_vma_init);
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 91f4ec9a0a56..af91901babb8 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -64,8 +64,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
 		VM_READ|VM_EXEC|
-		VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-		VM_ALWAYSDUMP,
+		VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 		vdsop);
 
 	up_write(&mm->mmap_sem);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 468d591dde31..a944020fa859 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -250,13 +250,7 @@ static int __init gate_vma_init(void)
 	gate_vma.vm_end = FIXADDR_USER_END;
 	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
 	gate_vma.vm_page_prot = __P101;
-	/*
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
-	 */
-	gate_vma.vm_flags |= VM_ALWAYSDUMP;
+
 	return 0;
 }
 
@@ -343,17 +337,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (compat_uses_vma || !compat) {
 		/*
 		 * MAYWRITE to allow gdb to COW and set breakpoints
-		 *
-		 * Make sure the vDSO gets into every core dump.
-		 * Dumping its contents makes post-mortem fully
-		 * interpretable later without matching up the same
-		 * kernel and hardware config to see what PC values
-		 * meant.
 		 */
 		ret = install_special_mapping(mm, addr, PAGE_SIZE,
 					      VM_READ|VM_EXEC|
-					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-					      VM_ALWAYSDUMP,
+					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 					      vdso32_pages);
 
 		if (ret)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 153407c35b75..17e18279649f 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -124,8 +124,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	ret = install_special_mapping(mm, addr, vdso_size,
 				      VM_READ|VM_EXEC|
-				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-				      VM_ALWAYSDUMP,
+				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 				      vdso_pages);
 	if (ret) {
 		current->mm->context.vdso = NULL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 81878b78c9d4..b64be5b5ac21 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1092,6 +1092,29 @@ out:
  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
  */
 
+/*
+ * The purpose of always_dump_vma() is to make sure that special kernel mappings
+ * that are useful for post-mortem analysis are included in every core dump.
+ * In that way we ensure that the core dump is fully interpretable later
+ * without matching up the same kernel and hardware config to see what PC values
+ * meant. These special mappings include - vDSO, vsyscall, and other
+ * architecture specific mappings
+ */
+static bool always_dump_vma(struct vm_area_struct *vma)
+{
+	/* Any vsyscall mappings? */
+	if (vma == get_gate_vma(vma->vm_mm))
+		return true;
+	/*
+	 * arch_vma_name() returns non-NULL for special architecture mappings,
+	 * such as vDSO sections.
+	 */
+	if (arch_vma_name(vma))
+		return true;
+
+	return false;
+}
+
 /*
  * Decide what to dump of a segment, part, all or none.
  */
@@ -1100,8 +1123,8 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 {
 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
 
-	/* The vma can be set up to tell us the answer directly.  */
-	if (vma->vm_flags & VM_ALWAYSDUMP)
+	/* always dump the vdso and vsyscall sections */
+	if (always_dump_vma(vma))
 		goto whole;
 
 	/* Hugetlb memory check */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7330742e7973..2de2ddba51d4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -111,7 +111,6 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HUGEPAGE	0x01000000	/* MADV_HUGEPAGE marked this vma */
 #endif
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
-#define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
 
 #define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
diff --git a/mm/memory.c b/mm/memory.c
index 3416b6e018d6..6105f475fa86 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3623,13 +3623,7 @@ static int __init gate_vma_init(void)
 	gate_vma.vm_end = FIXADDR_USER_END;
 	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
 	gate_vma.vm_page_prot = __P101;
-	/*
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
-	 */
-	gate_vma.vm_flags |= VM_ALWAYSDUMP;
+
 	return 0;
 }
 __initcall(gate_vma_init);
-- 
cgit v1.2.3-59-g8ed1b


From accb61fe7bb0f5c2a4102239e4981650f9048519 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 23 Mar 2012 15:02:51 -0700
Subject: coredump: add VM_NODUMP, MADV_NODUMP, MADV_CLEAR_NODUMP

Since we no longer need the VM_ALWAYSDUMP flag, let's use the freed bit
for 'VM_NODUMP' flag.  The idea is is to add a new madvise() flag:
MADV_DONTDUMP, which can be set by applications to specifically request
memory regions which should not dump core.

The specific application I have in mind is qemu: we can add a flag there
that wouldn't dump all of guest memory when qemu dumps core.  This flag
might also be useful for security sensitive apps that want to absolutely
make sure that parts of memory are not dumped.  To clear the flag use:
MADV_DODUMP.

[akpm@linux-foundation.org: s/MADV_NODUMP/MADV_DONTDUMP/, s/MADV_CLEAR_NODUMP/MADV_DODUMP/, per Roland]
[akpm@linux-foundation.org: fix up the architectures which broke]
Signed-off-by: Jason Baron <jbaron@redhat.com>
Acked-by: Roland McGrath <roland@hack.frob.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/mman.h     | 4 ++++
 arch/mips/include/asm/mman.h      | 4 ++++
 arch/parisc/include/asm/mman.h    | 4 ++++
 arch/xtensa/include/asm/mman.h    | 4 ++++
 fs/binfmt_elf.c                   | 3 +++
 include/asm-generic/mman-common.h | 4 ++++
 include/linux/mm.h                | 1 +
 mm/madvise.c                      | 8 ++++++++
 8 files changed, 32 insertions(+)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h
index 72db984f8781..cbeb3616a28e 100644
--- a/arch/alpha/include/asm/mman.h
+++ b/arch/alpha/include/asm/mman.h
@@ -56,6 +56,10 @@
 #define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
 #define MADV_NOHUGEPAGE	15		/* Not worth backing with hugepages */
 
+#define MADV_DONTDUMP   16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h
index 785b4ea4ec3f..46d3da0d4b92 100644
--- a/arch/mips/include/asm/mman.h
+++ b/arch/mips/include/asm/mman.h
@@ -80,6 +80,10 @@
 #define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
 #define MADV_NOHUGEPAGE	15		/* Not worth backing with hugepages */
 
+#define MADV_DONTDUMP   16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index f5b7bf5fba68..12219ebce869 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -62,6 +62,10 @@
 #define MADV_HUGEPAGE	67		/* Worth backing with hugepages */
 #define MADV_NOHUGEPAGE	68		/* Not worth backing with hugepages */
 
+#define MADV_DONTDUMP   69		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	70		/* Clear the MADV_NODUMP flag */
+
 /* compatibility flags */
 #define MAP_FILE	0
 #define MAP_VARIABLE	0
diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h
index 30789010733d..25bc6c1309c3 100644
--- a/arch/xtensa/include/asm/mman.h
+++ b/arch/xtensa/include/asm/mman.h
@@ -86,6 +86,10 @@
 #define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
 #define MADV_NOHUGEPAGE	15		/* Not worth backing with hugepages */
 
+#define MADV_DONTDUMP   16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b64be5b5ac21..504b6eee50a9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1127,6 +1127,9 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	if (always_dump_vma(vma))
 		goto whole;
 
+	if (vma->vm_flags & VM_NODUMP)
+		return 0;
+
 	/* Hugetlb memory check */
 	if (vma->vm_flags & VM_HUGETLB) {
 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index 787abbb6d867..d030d2c2647a 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -48,6 +48,10 @@
 #define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
 #define MADV_NOHUGEPAGE	15		/* Not worth backing with hugepages */
 
+#define MADV_DONTDUMP   16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2de2ddba51d4..a6fabdfd34c5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -111,6 +111,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HUGEPAGE	0x01000000	/* MADV_HUGEPAGE marked this vma */
 #endif
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
+#define VM_NODUMP	0x04000000	/* Do not include in the core dump */
 
 #define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
diff --git a/mm/madvise.c b/mm/madvise.c
index f5ab745672b7..1ccbba5b6674 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -65,6 +65,12 @@ static long madvise_behavior(struct vm_area_struct * vma,
 		}
 		new_flags &= ~VM_DONTCOPY;
 		break;
+	case MADV_DONTDUMP:
+		new_flags |= VM_NODUMP;
+		break;
+	case MADV_DODUMP:
+		new_flags &= ~VM_NODUMP;
+		break;
 	case MADV_MERGEABLE:
 	case MADV_UNMERGEABLE:
 		error = ksm_madvise(vma, start, end, behavior, &new_flags);
@@ -293,6 +299,8 @@ madvise_behavior_valid(int behavior)
 	case MADV_HUGEPAGE:
 	case MADV_NOHUGEPAGE:
 #endif
+	case MADV_DONTDUMP:
+	case MADV_DODUMP:
 		return 1;
 
 	default:
-- 
cgit v1.2.3-59-g8ed1b