From 451637e454f0b41689cd07cdc3fa53388c22890d Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 5 Nov 2015 18:44:24 -0800 Subject: kernel/watchdog.c: is_hardlockup can be boolean Make is_hardlockup return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Reviewed-by: Aaron Tomlin Acked-by: Don Zickus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 64ed1c37bd1f..568ba64b22d6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -263,15 +263,15 @@ void touch_softlockup_watchdog_sync(void) #ifdef CONFIG_HARDLOCKUP_DETECTOR /* watchdog detector functions */ -static int is_hardlockup(void) +static bool is_hardlockup(void) { unsigned long hrint = __this_cpu_read(hrtimer_interrupts); if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) - return 1; + return true; __this_cpu_write(hrtimer_interrupts_saved, hrint); - return 0; + return false; } #endif -- cgit v1.2.3-59-g8ed1b From d283c640cee6472852b95036ddd512c2ba0c1139 Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Thu, 5 Nov 2015 18:44:27 -0800 Subject: watchdog: fix error handling in proc_watchdog_thresh() The original watchdog_park_threads() function that was introduced by commit 81a4beef91ba ("watchdog: introduce watchdog_park_threads() and watchdog_unpark_threads()") takes a very simple approach to handle errors returned by kthread_park(): It attempts to roll back all watchdog threads to the unparked state. However, this may be undesired behaviour from the perspective of the caller which may want to handle errors as appropriate in its specific context. Currently, there are two possible call chains: - watchdog suspend/resume interface lockup_detector_suspend watchdog_park_threads - write to parameters in /proc/sys/kernel proc_watchdog_update watchdog_enable_all_cpus update_watchdog_all_cpus watchdog_park_threads Instead of 'blindly' attempting to unpark the watchdog threads if a kthread_park() call fails, the new approach is to disable the lockup detectors in the above call chains. Failure becomes visible to the user as follows: - error messages from lockup_detector_suspend() or watchdog_enable_all_cpus() - the state that can be read from /proc/sys/kernel/watchdog_enabled - the 'write' system call in the latter call chain returns an error I did not experience kthread_park() failures in practice, I used some instrumentation to fake error returns from kthread_park() in order to test the patches. This patch (of 5): Restore the previous value of watchdog_thresh _and_ sample_period if proc_watchdog_update() returns an error. The variables must be consistent to avoid false positives of the lockup detectors. Signed-off-by: Ulrich Obergfell Reviewed-by: Aaron Tomlin Acked-by: Don Zickus Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 568ba64b22d6..284f0e62a927 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -914,13 +914,14 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, goto out; /* - * Update the sample period. - * Restore 'watchdog_thresh' on failure. + * Update the sample period. Restore on failure. */ set_sample_period(); err = proc_watchdog_update(); - if (err) + if (err) { watchdog_thresh = old; + set_sample_period(); + } out: mutex_unlock(&watchdog_proc_mutex); return err; -- cgit v1.2.3-59-g8ed1b From 58cf690a09987c9a56933df05c0369d691d6224d Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Thu, 5 Nov 2015 18:44:30 -0800 Subject: watchdog: move watchdog_disable_all_cpus() outside of ifdef Move watchdog_disable_all_cpus() outside of the ifdef so that it is available if CONFIG_SYSCTL is not defined. This is preparation for "watchdog: implement error handling in update_watchdog_all_cpus() and callers". Signed-off-by: Ulrich Obergfell Reviewed-by: Aaron Tomlin Acked-by: Don Zickus Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 284f0e62a927..f0f8a78512a5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -347,6 +347,9 @@ static void watchdog_interrupt_count(void) static int watchdog_nmi_enable(unsigned int cpu); static void watchdog_nmi_disable(unsigned int cpu); +static int watchdog_enable_all_cpus(void); +static void watchdog_disable_all_cpus(void); + /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { @@ -756,9 +759,6 @@ static int watchdog_enable_all_cpus(void) return err; } -/* prepare/enable/disable routines */ -/* sysctl functions */ -#ifdef CONFIG_SYSCTL static void watchdog_disable_all_cpus(void) { if (watchdog_running) { @@ -767,6 +767,8 @@ static void watchdog_disable_all_cpus(void) } } +#ifdef CONFIG_SYSCTL + /* * Update the run state of the lockup detectors. */ -- cgit v1.2.3-59-g8ed1b From b43cb43cb85b91d79d9f0719ff581e8cb6dfbb8f Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Thu, 5 Nov 2015 18:44:33 -0800 Subject: watchdog: implement error handling in update_watchdog_all_cpus() and callers update_watchdog_all_cpus() now passes errors from watchdog_park_threads() up to functions in the call chain. This allows watchdog_enable_all_cpus() and proc_watchdog_update() to handle such errors too. Signed-off-by: Ulrich Obergfell Reviewed-by: Aaron Tomlin Acked-by: Don Zickus Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f0f8a78512a5..704f93317666 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -731,10 +731,17 @@ void lockup_detector_resume(void) mutex_unlock(&watchdog_proc_mutex); } -static void update_watchdog_all_cpus(void) +static int update_watchdog_all_cpus(void) { - watchdog_park_threads(); + int ret; + + ret = watchdog_park_threads(); + if (ret) + return ret; + watchdog_unpark_threads(); + + return 0; } static int watchdog_enable_all_cpus(void) @@ -753,9 +760,17 @@ static int watchdog_enable_all_cpus(void) * Enable/disable the lockup detectors or * change the sample period 'on the fly'. */ - update_watchdog_all_cpus(); + err = update_watchdog_all_cpus(); + + if (err) { + watchdog_disable_all_cpus(); + pr_err("Failed to update lockup detectors, disabled\n"); + } } + if (err) + watchdog_enabled = 0; + return err; } @@ -851,12 +866,13 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, } while (cmpxchg(&watchdog_enabled, old, new) != old); /* - * Update the run state of the lockup detectors. - * Restore 'watchdog_enabled' on failure. + * Update the run state of the lockup detectors. There is _no_ + * need to check the value returned by proc_watchdog_update() + * and to restore the previous value of 'watchdog_enabled' as + * both lockup detectors are disabled if proc_watchdog_update() + * returns an error. */ err = proc_watchdog_update(); - if (err) - watchdog_enabled = old; } out: mutex_unlock(&watchdog_proc_mutex); -- cgit v1.2.3-59-g8ed1b From c993590c6ae6273681d9fb2a8d26dce03bf9d96c Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Thu, 5 Nov 2015 18:44:36 -0800 Subject: watchdog: implement error handling in lockup_detector_suspend() lockup_detector_suspend() now handles errors from watchdog_park_threads(). Signed-off-by: Ulrich Obergfell Reviewed-by: Aaron Tomlin Acked-by: Don Zickus Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 704f93317666..e8b19db9c14a 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -707,6 +707,11 @@ int lockup_detector_suspend(void) if (ret == 0) watchdog_suspended++; + else { + watchdog_disable_all_cpus(); + pr_err("Failed to suspend lockup detectors, disabled\n"); + watchdog_enabled = 0; + } mutex_unlock(&watchdog_proc_mutex); -- cgit v1.2.3-59-g8ed1b From ee7fed540563b27e1028bec0b509921496c91bf9 Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Thu, 5 Nov 2015 18:44:39 -0800 Subject: watchdog: do not unpark threads in watchdog_park_threads() on error If kthread_park() returns an error, watchdog_park_threads() should not blindly 'roll back' the already parked threads to the unparked state. Instead leave it up to the callers to handle such errors appropriately in their context. For example, it is redundant to unpark the threads if the lockup detectors will soon be disabled by the callers anyway. Signed-off-by: Ulrich Obergfell Reviewed-by: Aaron Tomlin Acked-by: Don Zickus Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index e8b19db9c14a..452e4ed507e5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -654,6 +654,12 @@ static struct smp_hotplug_thread watchdog_threads = { /* * park all watchdog threads that are specified in 'watchdog_cpumask' + * + * This function returns an error if kthread_park() of a watchdog thread + * fails. In this situation, the watchdog threads of some CPUs can already + * be parked and the watchdog threads of other CPUs can still be runnable. + * Callers are expected to handle this special condition as appropriate in + * their context. */ static int watchdog_park_threads(void) { @@ -665,10 +671,6 @@ static int watchdog_park_threads(void) if (ret) break; } - if (ret) { - for_each_watchdog_cpu(cpu) - kthread_unpark(per_cpu(softlockup_watchdog, cpu)); - } put_online_cpus(); return ret; -- cgit v1.2.3-59-g8ed1b From 55537871ef666b4153fd1ef8782e4a13fee142cc Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 5 Nov 2015 18:44:41 -0800 Subject: kernel/watchdog.c: perform all-CPU backtrace in case of hard lockup In many cases of hardlockup reports, it's actually not possible to know why it triggered, because the CPU that got stuck is usually waiting on a resource (with IRQs disabled) in posession of some other CPU is holding. IOW, we are often looking at the stacktrace of the victim and not the actual offender. Introduce sysctl / cmdline parameter that makes it possible to have hardlockup detector perform all-CPU backtrace. Signed-off-by: Jiri Kosina Reviewed-by: Aaron Tomlin Cc: Ulrich Obergfell Acked-by: Don Zickus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 5 +++++ Documentation/sysctl/kernel.txt | 12 ++++++++++++ include/linux/nmi.h | 1 + kernel/sysctl.c | 9 +++++++++ kernel/watchdog.c | 33 ++++++++++++++++++++++++++++----- 5 files changed, 55 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6263a2da3e2f..0231f4508abe 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1269,6 +1269,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Format: such that (rxsize & ~0x1fffc0) == 0. Default: 1024 + hardlockup_all_cpu_backtrace= + [KNL] Should the hard-lockup detector generate + backtraces on all cpus. + Format: + hashdist= [KNL,NUMA] Large hashes allocated during boot are distributed across NUMA nodes. Defaults on for 64-bit NUMA, off otherwise. diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 6fccb69c03e7..af70d1541d3a 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -33,6 +33,7 @@ show up in /proc/sys/kernel: - domainname - hostname - hotplug +- hardlockup_all_cpu_backtrace - hung_task_panic - hung_task_check_count - hung_task_timeout_secs @@ -292,6 +293,17 @@ Information Service) or YP (Yellow Pages) domainname. These two domain names are in general different. For a detailed discussion see the hostname(1) man page. +============================================================== +hardlockup_all_cpu_backtrace: + +This value controls the hard lockup detector behavior when a hard +lockup condition is detected as to whether or not to gather further +debug information. If enabled, arch-specific all-CPU stack dumping +will be initiated. + +0: do nothing. This is the default behavior. + +1: on detection capture more debug information. ============================================================== hotplug: diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 78488e099ce7..7ec5b86735f3 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -73,6 +73,7 @@ extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long *watchdog_cpumask_bits; extern int sysctl_softlockup_all_cpu_backtrace; +extern int sysctl_hardlockup_all_cpu_backtrace; struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 96c856b04081..1a5faa3e1521 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -898,6 +898,15 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "hardlockup_all_cpu_backtrace", + .data = &sysctl_hardlockup_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, #endif /* CONFIG_SMP */ #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 452e4ed507e5..f6b32b8cbffe 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -57,8 +57,10 @@ int __read_mostly watchdog_thresh = 10; #ifdef CONFIG_SMP int __read_mostly sysctl_softlockup_all_cpu_backtrace; +int __read_mostly sysctl_hardlockup_all_cpu_backtrace; #else #define sysctl_softlockup_all_cpu_backtrace 0 +#define sysctl_hardlockup_all_cpu_backtrace 0 #endif static struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); @@ -112,6 +114,7 @@ static unsigned long soft_lockup_nmi_warn; #ifdef CONFIG_HARDLOCKUP_DETECTOR static int hardlockup_panic = CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; +static unsigned long hardlockup_allcpu_dumped; /* * We may not want to enable hard lockup detection by default in all cases, * for example when running the kernel as a guest on a hypervisor. In these @@ -173,6 +176,13 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str) return 1; } __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); +static int __init hardlockup_all_cpu_backtrace_setup(char *str) +{ + sysctl_hardlockup_all_cpu_backtrace = + !!simple_strtol(str, NULL, 0); + return 1; +} +__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); #endif /* @@ -318,17 +328,30 @@ static void watchdog_overflow_callback(struct perf_event *event, */ if (is_hardlockup()) { int this_cpu = smp_processor_id(); + struct pt_regs *regs = get_irq_regs(); /* only print hardlockups once */ if (__this_cpu_read(hard_watchdog_warn) == true) return; - if (hardlockup_panic) - panic("Watchdog detected hard LOCKUP on cpu %d", - this_cpu); + pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); + print_modules(); + print_irqtrace_events(current); + if (regs) + show_regs(regs); else - WARN(1, "Watchdog detected hard LOCKUP on cpu %d", - this_cpu); + dump_stack(); + + /* + * Perform all-CPU dump only once to avoid multiple hardlockups + * generating interleaving traces + */ + if (sysctl_hardlockup_all_cpu_backtrace && + !test_and_set_bit(0, &hardlockup_allcpu_dumped)) + trigger_allbutself_cpu_backtrace(); + + if (hardlockup_panic) + panic("Hard LOCKUP"); __this_cpu_write(hard_watchdog_warn, true); return; -- cgit v1.2.3-59-g8ed1b From ac1f591249d95372f3a5ab3828d4af5dfbf5efd3 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 5 Nov 2015 18:44:44 -0800 Subject: kernel/watchdog.c: add sysctl knob hardlockup_panic The only way to enable a hardlockup to panic the machine is to set 'nmi_watchdog=panic' on the kernel command line. This makes it awkward for end users and folks who want to run automate tests (like myself). Mimic the softlockup_panic knob and create a /proc/sys/kernel/hardlockup_panic knob. Signed-off-by: Don Zickus Cc: Ulrich Obergfell Acked-by: Jiri Kosina Reviewed-by: Aaron Tomlin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/lockup-watchdogs.txt | 5 +++-- include/linux/sched.h | 1 + kernel/sysctl.c | 11 +++++++++++ kernel/watchdog.c | 2 +- 4 files changed, 16 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt index 22dd6af2e4bd..4a6e33e1af61 100644 --- a/Documentation/lockup-watchdogs.txt +++ b/Documentation/lockup-watchdogs.txt @@ -20,8 +20,9 @@ kernel mode for more than 10 seconds (see "Implementation" below for details), without letting other interrupts have a chance to run. Similarly to the softlockup case, the current stack trace is displayed upon detection and the system will stay locked up unless the default -behavior is changed, which can be done through a compile time knob, -"BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog" +behavior is changed, which can be done through a sysctl, +'hardlockup_panic', a compile time knob, "BOOTPARAM_HARDLOCKUP_PANIC", +and a kernel parameter, "nmi_watchdog" (see "Documentation/kernel-parameters.txt" for details). The panic option can be used in combination with panic_timeout (this diff --git a/include/linux/sched.h b/include/linux/sched.h index c115d617739d..5423b9c82fee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -384,6 +384,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; +extern unsigned int hardlockup_panic; void lockup_detector_init(void); #else static inline void touch_softlockup_watchdog(void) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a5faa3e1521..dc6858d6639e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -888,6 +888,17 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, +#ifdef CONFIG_HARDLOCKUP_DETECTOR + { + .procname = "hardlockup_panic", + .data = &hardlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif #ifdef CONFIG_SMP { .procname = "softlockup_all_cpu_backtrace", diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f6b32b8cbffe..0a23125369f1 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -112,7 +112,7 @@ static unsigned long soft_lockup_nmi_warn; * Should we panic when a soft-lockup or hard-lockup occurs: */ #ifdef CONFIG_HARDLOCKUP_DETECTOR -static int hardlockup_panic = +unsigned int __read_mostly hardlockup_panic = CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; static unsigned long hardlockup_allcpu_dumped; /* -- cgit v1.2.3-59-g8ed1b From ee89e71eb091d3ef8ca2be8bd4ec77ccfa91334c Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Thu, 5 Nov 2015 18:44:47 -0800 Subject: kernel/watchdog.c: avoid race between lockup detector suspend/resume and CPU hotplug The lockup detector suspend/resume interface that was introduced by commit 8c073d27d7ad ("watchdog: introduce watchdog_suspend() and watchdog_resume()") does not protect itself against races with CPU hotplug. Hence, theoretically it is possible that a new watchdog thread is started on a hotplugged CPU while the lockup detector is suspended, and the thread could thus interfere unexpectedly with the code that requested to suspend the lockup detector. Avoid the race by calling get_online_cpus() in lockup_detector_suspend() put_online_cpus() in lockup_detector_resume() Signed-off-by: Ulrich Obergfell Acked-by: Don Zickus Reviewed-by: Aaron Tomlin Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 0a23125369f1..7357842da933 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -719,6 +719,7 @@ int lockup_detector_suspend(void) { int ret = 0; + get_online_cpus(); mutex_lock(&watchdog_proc_mutex); /* * Multiple suspend requests can be active in parallel (counted by @@ -759,6 +760,7 @@ void lockup_detector_resume(void) watchdog_unpark_threads(); mutex_unlock(&watchdog_proc_mutex); + put_online_cpus(); } static int update_watchdog_all_cpus(void) -- cgit v1.2.3-59-g8ed1b From 8614ddef82139d08234dbf681188f9bcddae9f03 Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Thu, 5 Nov 2015 18:44:50 -0800 Subject: kernel/watchdog.c: avoid races between /proc handlers and CPU hotplug The handler functions for watchdog parameters in /proc/sys/kernel do not protect themselves against races with CPU hotplug. Hence, theoretically it is possible that a new watchdog thread is started on a hotplugged CPU while a parameter is being modified, and the thread could thus use a parameter value that is 'in transition'. For example, if 'watchdog_thresh' is being set to zero (note: this disables the lockup detectors) the thread would erroneously use the value zero as the sample period. To avoid such races and to keep the /proc handler code consistent, call {get|put}_online_cpus() in proc_watchdog_common() {get|put}_online_cpus() in proc_watchdog_thresh() {get|put}_online_cpus() in proc_watchdog_cpumask() Signed-off-by: Ulrich Obergfell Acked-by: Don Zickus Reviewed-by: Aaron Tomlin Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7357842da933..13fdda1a4c91 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -857,6 +857,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, int err, old, new; int *watchdog_param = (int *)table->data; + get_online_cpus(); mutex_lock(&watchdog_proc_mutex); if (watchdog_suspended) { @@ -908,6 +909,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, } out: mutex_unlock(&watchdog_proc_mutex); + put_online_cpus(); return err; } @@ -949,6 +951,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, { int err, old; + get_online_cpus(); mutex_lock(&watchdog_proc_mutex); if (watchdog_suspended) { @@ -974,6 +977,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, } out: mutex_unlock(&watchdog_proc_mutex); + put_online_cpus(); return err; } @@ -988,6 +992,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, { int err; + get_online_cpus(); mutex_lock(&watchdog_proc_mutex); if (watchdog_suspended) { @@ -1015,6 +1020,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, } out: mutex_unlock(&watchdog_proc_mutex); + put_online_cpus(); return err; } -- cgit v1.2.3-59-g8ed1b From a2a45b85ec45db4b041ea5d93b21033dbc3cc0fc Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Thu, 5 Nov 2015 18:44:53 -0800 Subject: kernel/watchdog.c: remove {get|put}_online_cpus() from watchdog_{park|unpark}_threads() watchdog_{park|unpark}_threads() are now called in code paths that protect themselves against CPU hotplug, so {get|put}_online_cpus() calls are redundant and can be removed. Signed-off-by: Ulrich Obergfell Acked-by: Don Zickus Reviewed-by: Aaron Tomlin Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 13fdda1a4c91..84c4744d1977 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -683,33 +683,35 @@ static struct smp_hotplug_thread watchdog_threads = { * be parked and the watchdog threads of other CPUs can still be runnable. * Callers are expected to handle this special condition as appropriate in * their context. + * + * This function may only be called in a context that is protected against + * races with CPU hotplug - for example, via get_online_cpus(). */ static int watchdog_park_threads(void) { int cpu, ret = 0; - get_online_cpus(); for_each_watchdog_cpu(cpu) { ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); if (ret) break; } - put_online_cpus(); return ret; } /* * unpark all watchdog threads that are specified in 'watchdog_cpumask' + * + * This function may only be called in a context that is protected against + * races with CPU hotplug - for example, via get_online_cpus(). */ static void watchdog_unpark_threads(void) { int cpu; - get_online_cpus(); for_each_watchdog_cpu(cpu) kthread_unpark(per_cpu(softlockup_watchdog, cpu)); - put_online_cpus(); } /* -- cgit v1.2.3-59-g8ed1b From 39d2da2161d35de301ec5397ce9103c68b883054 Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Thu, 5 Nov 2015 18:44:56 -0800 Subject: kernel/watchdog.c: fix race between proc_watchdog_thresh() and watchdog_timer_fn() Theoretically it is possible that the watchdog timer expires right at the time when a user sets 'watchdog_thresh' to zero (note: this disables the lockup detectors). In this scenario, the is_softlockup() function - which is called by the timer - could produce a false positive. Fix this by checking the current value of 'watchdog_thresh'. Signed-off-by: Ulrich Obergfell Acked-by: Don Zickus Reviewed-by: Aaron Tomlin Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 84c4744d1977..18f34cf75f74 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -289,7 +289,7 @@ static int is_softlockup(unsigned long touch_ts) { unsigned long now = get_timestamp(); - if (watchdog_enabled & SOFT_WATCHDOG_ENABLED) { + if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){ /* Warn about unreasonable delays. */ if (time_after(now, touch_ts + get_softlockup_thresh())) return now - touch_ts; -- cgit v1.2.3-59-g8ed1b From da39da3a54fed88e29024f2f1f6cd7357cd03a44 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Nov 2015 18:48:05 -0800 Subject: mm, oom: remove task_lock protecting comm printing The oom killer takes task_lock() in a couple of places solely to protect printing the task's comm. A process's comm, including current's comm, may change due to /proc/pid/comm or PR_SET_NAME. The comm will always be NULL-terminated, so the worst race scenario would only be during update. We can tolerate a comm being printed that is in the middle of an update to avoid taking the lock. Other locations in the kernel have already dropped task_lock() when printing comm, so this is consistent. Signed-off-by: David Rientjes Suggested-by: Oleg Nesterov Cc: Michal Hocko Cc: Vladimir Davydov Cc: Sergey Senozhatsky Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 4 ++-- kernel/cpuset.c | 14 +++++++------- mm/oom_kill.c | 8 +------- 3 files changed, 10 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 1b357997cac5..5a1311942358 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -93,7 +93,7 @@ extern int current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); -extern void cpuset_print_task_mems_allowed(struct task_struct *p); +extern void cpuset_print_current_mems_allowed(void); /* * read_mems_allowed_begin is required when making decisions involving @@ -219,7 +219,7 @@ static inline void rebuild_sched_domains(void) partition_sched_domains(1, NULL, NULL); } -static inline void cpuset_print_task_mems_allowed(struct task_struct *p) +static inline void cpuset_print_current_mems_allowed(void) { } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f0acff0f66c9..9ef59a37c190 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2598,22 +2598,22 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, } /** - * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed - * @tsk: pointer to task_struct of some task. + * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed * - * Description: Prints @task's name, cpuset name, and cached copy of its + * Description: Prints current's name, cpuset name, and cached copy of its * mems_allowed to the kernel log. */ -void cpuset_print_task_mems_allowed(struct task_struct *tsk) +void cpuset_print_current_mems_allowed(void) { struct cgroup *cgrp; rcu_read_lock(); - cgrp = task_cs(tsk)->css.cgroup; - pr_info("%s cpuset=", tsk->comm); + cgrp = task_cs(current)->css.cgroup; + pr_info("%s cpuset=", current->comm); pr_cont_cgroup_name(cgrp); - pr_cont(" mems_allowed=%*pbl\n", nodemask_pr_args(&tsk->mems_allowed)); + pr_cont(" mems_allowed=%*pbl\n", + nodemask_pr_args(¤t->mems_allowed)); rcu_read_unlock(); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c170d9f0a158..58f3d2748ced 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -377,13 +377,11 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) static void dump_header(struct oom_control *oc, struct task_struct *p, struct mem_cgroup *memcg) { - task_lock(current); pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " "oom_score_adj=%hd\n", current->comm, oc->gfp_mask, oc->order, current->signal->oom_score_adj); - cpuset_print_task_mems_allowed(current); - task_unlock(current); + cpuset_print_current_mems_allowed(); dump_stack(); if (memcg) mem_cgroup_print_oom_info(memcg, p); @@ -509,10 +507,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, if (__ratelimit(&oom_rs)) dump_header(oc, p, memcg); - task_lock(p); pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n", message, task_pid_nr(p), p->comm, points); - task_unlock(p); /* * If any of p's children has a different mm and is eligible for kill, @@ -586,10 +582,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, if (fatal_signal_pending(p)) continue; - task_lock(p); /* Protect ->comm from prctl() */ pr_info("Kill process %d (%s) sharing same memory\n", task_pid_nr(p), p->comm); - task_unlock(p); do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); } rcu_read_unlock(); -- cgit v1.2.3-59-g8ed1b From a8ca5d0ecbdde5cc3d7accacbd69968b0c98764e Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Thu, 5 Nov 2015 18:51:33 -0800 Subject: mm: mlock: add new mlock system call With the refactored mlock code, introduce a new system call for mlock. The new call will allow the user to specify what lock states are being added. mlock2 is trivial at the moment, but a follow on patch will add a new mlock state making it useful. Signed-off-by: Eric B Munson Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Heiko Carstens Cc: Geert Uytterhoeven Cc: Catalin Marinas Cc: Stephen Rothwell Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Michael Kerrisk Cc: Ralf Baechle Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 2 ++ include/uapi/asm-generic/unistd.h | 4 +++- kernel/sys_ni.c | 1 + mm/mlock.c | 8 ++++++++ 6 files changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index caa2c712d1e7..f17705e1332c 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -382,3 +382,4 @@ 373 i386 shutdown sys_shutdown 374 i386 userfaultfd sys_userfaultfd 375 i386 membarrier sys_membarrier +376 i386 mlock2 sys_mlock2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 278842fdf1f6..314a90bfc09c 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -331,6 +331,7 @@ 322 64 execveat stub_execveat 323 common userfaultfd sys_userfaultfd 324 common membarrier sys_membarrier +325 common mlock2 sys_mlock2 # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a460e2ef2843..a156b82dd14c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -887,4 +887,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename, asmlinkage long sys_membarrier(int cmd, int flags); +asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); + #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index ee124009e12a..1324b0292ec2 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -713,9 +713,11 @@ __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat) __SYSCALL(__NR_userfaultfd, sys_userfaultfd) #define __NR_membarrier 283 __SYSCALL(__NR_membarrier, sys_membarrier) +#define __NR_mlock2 284 +__SYSCALL(__NR_mlock2, sys_mlock2) #undef __NR_syscalls -#define __NR_syscalls 284 +#define __NR_syscalls 285 /* * All syscalls below here should go away really, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a02decf15583..0623787ec67a 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -194,6 +194,7 @@ cond_syscall(sys_mlock); cond_syscall(sys_munlock); cond_syscall(sys_mlockall); cond_syscall(sys_munlockall); +cond_syscall(sys_mlock2); cond_syscall(sys_mincore); cond_syscall(sys_madvise); cond_syscall(sys_mremap); diff --git a/mm/mlock.c b/mm/mlock.c index fbd8c03f7b37..35dcf8fa7195 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -644,6 +644,14 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) return do_mlock(start, len, VM_LOCKED); } +SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) +{ + if (flags) + return -EINVAL; + + return do_mlock(start, len, VM_LOCKED); +} + SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) { int ret; -- cgit v1.2.3-59-g8ed1b From de60f5f10c58d4f34b68622442c0e04180367f3f Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Thu, 5 Nov 2015 18:51:36 -0800 Subject: mm: introduce VM_LOCKONFAULT The cost of faulting in all memory to be locked can be very high when working with large mappings. If only portions of the mapping will be used this can incur a high penalty for locking. For the example of a large file, this is the usage pattern for a large statical language model (probably applies to other statical or graphical models as well). For the security example, any application transacting in data that cannot be swapped out (credit card data, medical records, etc). This patch introduces the ability to request that pages are not pre-faulted, but are placed on the unevictable LRU when they are finally faulted in. The VM_LOCKONFAULT flag will be used together with VM_LOCKED and has no effect when set without VM_LOCKED. Setting the VM_LOCKONFAULT flag for a VMA will cause pages faulted into that VMA to be added to the unevictable LRU when they are faulted or if they are already present, but will not cause any missing pages to be faulted in. Exposing this new lock state means that we cannot overload the meaning of the FOLL_POPULATE flag any longer. Prior to this patch it was used to mean that the VMA for a fault was locked. This means we need the new FOLL_MLOCK flag to communicate the locked state of a VMA. FOLL_POPULATE will now only control if the VMA should be populated and in the case of VM_LOCKONFAULT, it will not be set. Signed-off-by: Eric B Munson Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: Jonathan Corbet Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Guenter Roeck Cc: Heiko Carstens Cc: Michael Kerrisk Cc: Ralf Baechle Cc: Shuah Khan Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++++ kernel/fork.c | 3 ++- mm/debug.c | 1 + mm/gup.c | 10 ++++++++-- mm/huge_memory.c | 2 +- mm/hugetlb.c | 4 ++-- mm/mlock.c | 2 +- mm/mmap.c | 2 +- 8 files changed, 21 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3c258f8eb9ae..906c46a05707 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -139,6 +139,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ +#define VM_LOCKONFAULT 0x00080000 /* Lock the pages covered when they are faulted in */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ @@ -202,6 +203,9 @@ extern unsigned int kobjsize(const void *objp); /* This mask defines which mm->def_flags a process can inherit its parent */ #define VM_INIT_DEF_MASK VM_NOHUGEPAGE +/* This mask is used to clear all the VMA flags used by mlock */ +#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT)) + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. @@ -2137,6 +2141,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ +#define FOLL_MLOCK 0x1000 /* lock present pages */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/kernel/fork.c b/kernel/fork.c index 6ac894244d39..a30fae45b486 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -454,7 +454,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) tmp->vm_mm = mm; if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; - tmp->vm_flags &= ~(VM_LOCKED|VM_UFFD_MISSING|VM_UFFD_WP); + tmp->vm_flags &= + ~(VM_LOCKED|VM_LOCKONFAULT|VM_UFFD_MISSING|VM_UFFD_WP); tmp->vm_next = tmp->vm_prev = NULL; tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; file = tmp->vm_file; diff --git a/mm/debug.c b/mm/debug.c index 6c1b3ea61bfd..e784110fb51d 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -125,6 +125,7 @@ static const struct trace_print_flags vmaflags_names[] = { {VM_GROWSDOWN, "growsdown" }, {VM_PFNMAP, "pfnmap" }, {VM_DENYWRITE, "denywrite" }, + {VM_LOCKONFAULT, "lockonfault" }, {VM_LOCKED, "locked" }, {VM_IO, "io" }, {VM_SEQ_READ, "seqread" }, diff --git a/mm/gup.c b/mm/gup.c index a798293fc648..deafa2c91b36 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -129,7 +129,7 @@ retry: */ mark_page_accessed(page); } - if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) { + if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * The preliminary mapping check is mainly to avoid the * pointless overhead of lock_page on the ZERO_PAGE @@ -299,6 +299,9 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, unsigned int fault_flags = 0; int ret; + /* mlock all present pages, but do not fault in new pages */ + if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) + return -ENOENT; /* For mm_populate(), just skip the stack guard page. */ if ((*flags & FOLL_POPULATE) && (stack_guard_page_start(vma, address) || @@ -890,7 +893,10 @@ long populate_vma_page_range(struct vm_area_struct *vma, VM_BUG_ON_VMA(end > vma->vm_end, vma); VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); - gup_flags = FOLL_TOUCH | FOLL_POPULATE; + gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK; + if (vma->vm_flags & VM_LOCKONFAULT) + gup_flags &= ~FOLL_POPULATE; + /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3fd0311c3ba7..f5c08b46fef8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1307,7 +1307,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, pmd, _pmd, 1)) update_mmu_cache_pmd(vma, addr, pmd); } - if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) { + if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { if (page->mapping && trylock_page(page)) { lru_add_drain(); if (page->mapping) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 241de2712b36..74ef0c6a25dd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4137,8 +4137,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma, unsigned long s_end = sbase + PUD_SIZE; /* Allow segments to share if only one is marked locked */ - unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; - unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; + unsigned long vm_flags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; + unsigned long svm_flags = svma->vm_flags & VM_LOCKED_CLEAR_MASK; /* * match the virtual addresses, permission and the alignment of the diff --git a/mm/mlock.c b/mm/mlock.c index 35dcf8fa7195..ca3894113b97 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -422,7 +422,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, void munlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - vma->vm_flags &= ~VM_LOCKED; + vma->vm_flags &= VM_LOCKED_CLEAR_MASK; while (start < end) { struct page *page = NULL; diff --git a/mm/mmap.c b/mm/mmap.c index 220effde8ea3..2ce04a649f6b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1661,7 +1661,7 @@ out: vma == get_gate_vma(current->mm))) mm->locked_vm += (len >> PAGE_SHIFT); else - vma->vm_flags &= ~VM_LOCKED; + vma->vm_flags &= VM_LOCKED_CLEAR_MASK; } if (file) -- cgit v1.2.3-59-g8ed1b