From 60b61a6f42f36e4fbfbc0139b7e86ce1494d2d9b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 Sep 2015 15:38:10 -0700 Subject: kmod: correct documentation of return status of request_module If request_module() successfully runs modprobe, but modprobe exits with a non-zero status, then the return value from request_module() will be that (positive) error status. So the return from request_module can be: negative errno zero for success positive exit code. Signed-off-by: NeilBrown Cc: Goldwyn Rodrigues Cc: Oleg Nesterov Cc: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kmod.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/kmod.c') diff --git a/kernel/kmod.c b/kernel/kmod.c index 2777f40a9c7b..1734ba61ff23 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -114,10 +114,11 @@ out: * @...: arguments as specified in the format string * * Load a module using the user mode module loader. The function returns - * zero on success or a negative errno code on failure. Note that a - * successful module load does not mean the module did not then unload - * and exit on an error of its own. Callers must check that the service - * they requested is now available not blindly invoke it. + * zero on success or a negative errno code or positive exit code from + * "modprobe" on failure. Note that a successful module load does not mean + * the module did not then unload and exit on an error of its own. Callers + * must check that the service they requested is now available not blindly + * invoke it. * * If module auto-loading support is disabled then this function * becomes a no-operation. -- cgit v1.3-6-gb490 From b6b50a814d0ece9c1f98f2b3b5c2a251a5c9a211 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2015 15:38:13 -0700 Subject: kmod: bunch of internal functions renames This patchset does a bunch of cleanups and converts khelper to use system unbound workqueues. The 3 first patches should be uncontroversial. The last 2 patches are debatable. Kmod creates kernel threads that perform userspace jobs and we want those to have a large affinity in order not to contend busy CPUs. This is (partly) why we use khelper which has a wide affinity that the kernel threads it create can inherit from. Now khelper is a dedicated workqueue that has singlethread properties which we aren't interested in. Hence those two debatable changes: _ We would like to use generic workqueues. System unbound workqueues are a very good candidate but they are not wide affine, only node affine. Now probably a node is enough to perform many parallel kmod jobs. _ We would like to remove the wait_for_helper kernel thread (UMH_WAIT_PROC handler) to use the workqueue. It means that if the workqueue blocks, and no other worker can take pending kmod request, we can be screwed. Now if we have 512 threads, this should be enough. This patch (of 5): Underscores on function names aren't much verbose to explain the purpose of a function. And kmod has interesting such flavours. Lets rename the following functions: * __call_usermodehelper -> call_usermodehelper_exec_work * ____call_usermodehelper -> call_usermodehelper_exec_async * wait_for_helper -> call_usermodehelper_exec_sync Signed-off-by: Frederic Weisbecker Cc: Rik van Riel Reviewed-by: Oleg Nesterov Cc: Christoph Lameter Cc: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kmod.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'kernel/kmod.c') diff --git a/kernel/kmod.c b/kernel/kmod.c index 1734ba61ff23..2d83511e9610 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -214,7 +214,7 @@ static void umh_complete(struct subprocess_info *sub_info) /* * This is the task which runs the usermode application */ -static int ____call_usermodehelper(void *data) +static int call_usermodehelper_exec_async(void *data) { struct subprocess_info *sub_info = data; struct cred *new; @@ -259,7 +259,10 @@ static int ____call_usermodehelper(void *data) (const char __user *const __user *)sub_info->envp); out: sub_info->retval = retval; - /* wait_for_helper() will call umh_complete if UHM_WAIT_PROC. */ + /* + * call_usermodehelper_exec_sync() will call umh_complete + * if UHM_WAIT_PROC. + */ if (!(sub_info->wait & UMH_WAIT_PROC)) umh_complete(sub_info); if (!retval) @@ -268,14 +271,14 @@ out: } /* Keventd can't block, but this (a child) can. */ -static int wait_for_helper(void *data) +static int call_usermodehelper_exec_sync(void *data) { struct subprocess_info *sub_info = data; pid_t pid; /* If SIGCLD is ignored sys_wait4 won't populate the status. */ kernel_sigaction(SIGCHLD, SIG_DFL); - pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD); + pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); if (pid < 0) { sub_info->retval = pid; } else { @@ -283,17 +286,18 @@ static int wait_for_helper(void *data) /* * Normally it is bogus to call wait4() from in-kernel because * wait4() wants to write the exit code to a userspace address. - * But wait_for_helper() always runs as keventd, and put_user() - * to a kernel address works OK for kernel threads, due to their - * having an mm_segment_t which spans the entire address space. + * But call_usermodehelper_exec_sync() always runs as keventd, + * and put_user() to a kernel address works OK for kernel + * threads, due to their having an mm_segment_t which spans the + * entire address space. * * Thus the __user pointer cast is valid here. */ sys_wait4(pid, (int __user *)&ret, 0, NULL); /* - * If ret is 0, either ____call_usermodehelper failed and the - * real error code is already in sub_info->retval or + * If ret is 0, either call_usermodehelper_exec_async failed and + * the real error code is already in sub_info->retval or * sub_info->retval is 0 anyway, so don't mess with it then. */ if (ret) @@ -305,17 +309,17 @@ static int wait_for_helper(void *data) } /* This is run by khelper thread */ -static void __call_usermodehelper(struct work_struct *work) +static void call_usermodehelper_exec_work(struct work_struct *work) { struct subprocess_info *sub_info = container_of(work, struct subprocess_info, work); pid_t pid; if (sub_info->wait & UMH_WAIT_PROC) - pid = kernel_thread(wait_for_helper, sub_info, + pid = kernel_thread(call_usermodehelper_exec_sync, sub_info, CLONE_FS | CLONE_FILES | SIGCHLD); else - pid = kernel_thread(____call_usermodehelper, sub_info, + pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); if (pid < 0) { @@ -510,7 +514,7 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, if (!sub_info) goto out; - INIT_WORK(&sub_info->work, __call_usermodehelper); + INIT_WORK(&sub_info->work, call_usermodehelper_exec_work); sub_info->path = path; sub_info->argv = argv; sub_info->envp = envp; -- cgit v1.3-6-gb490 From d097c0240ae8085dd39aa6ca9bd9960969b2b38e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2015 15:38:16 -0700 Subject: kmod: remove unecessary explicit wide CPU affinity setting Khelper is affine to all CPUs. Now since it creates the call_usermodehelper_exec_[a]sync() kernel threads, those inherit the wide affinity. As such explicitly forcing a wide affinity from those kernel threads is like a no-op. Just remove it. It's needless and it breaks CPU isolation users who rely on workqueue affinity tuning. Signed-off-by: Frederic Weisbecker Cc: Rik van Riel Reviewed-by: Oleg Nesterov Cc: Christoph Lameter Cc: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kmod.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/kmod.c') diff --git a/kernel/kmod.c b/kernel/kmod.c index 2d83511e9610..d910b6378fb6 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -224,9 +224,6 @@ static int call_usermodehelper_exec_async(void *data) flush_signal_handlers(current, 1); spin_unlock_irq(¤t->sighand->siglock); - /* We can run anywhere, unlike our parent keventd(). */ - set_cpus_allowed_ptr(current, cpu_all_mask); - /* * Our parent is keventd, which runs with elevated scheduling priority. * Avoid propagating that into the userspace child. -- cgit v1.3-6-gb490 From b639e86bae431db3fbc9fae8d09a9bbf97b74711 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2015 15:38:19 -0700 Subject: kmod: add up-to-date explanations on the purpose of each asynchronous levels There seem to be quite some confusions on the comments, likely due to changes that came after them. Now since it's very non obvious why we have 3 levels of asynchronous code to implement usermodehelpers, it's important to comment in detail the reason of this layout. Signed-off-by: Frederic Weisbecker Cc: Rik van Riel Reviewed-by: Oleg Nesterov Cc: Christoph Lameter Cc: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kmod.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'kernel/kmod.c') diff --git a/kernel/kmod.c b/kernel/kmod.c index d910b6378fb6..81c67050c5aa 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -225,8 +225,8 @@ static int call_usermodehelper_exec_async(void *data) spin_unlock_irq(¤t->sighand->siglock); /* - * Our parent is keventd, which runs with elevated scheduling priority. - * Avoid propagating that into the userspace child. + * Our parent is khelper which runs with elevated scheduling + * priority. Avoid propagating that into the userspace child. */ set_user_nice(current, 0); @@ -267,7 +267,11 @@ out: do_exit(0); } -/* Keventd can't block, but this (a child) can. */ +/* + * Handles UMH_WAIT_PROC. Our parent khelper can't wait for usermodehelper + * completion without blocking every other pending requests. That's why + * we use a kernel thread dedicated for that purpose. + */ static int call_usermodehelper_exec_sync(void *data) { struct subprocess_info *sub_info = data; @@ -283,8 +287,8 @@ static int call_usermodehelper_exec_sync(void *data) /* * Normally it is bogus to call wait4() from in-kernel because * wait4() wants to write the exit code to a userspace address. - * But call_usermodehelper_exec_sync() always runs as keventd, - * and put_user() to a kernel address works OK for kernel + * But call_usermodehelper_exec_sync() always runs as kernel + * thread and put_user() to a kernel address works OK for kernel * threads, due to their having an mm_segment_t which spans the * entire address space. * @@ -305,7 +309,19 @@ static int call_usermodehelper_exec_sync(void *data) do_exit(0); } -/* This is run by khelper thread */ +/* + * This function doesn't strictly needs to be called asynchronously. But we + * need to create the usermodehelper kernel threads from a task that is affine + * to all CPUs (or nohz housekeeping ones) such that they inherit a widest + * affinity irrespective of call_usermodehelper() callers with possibly reduced + * affinity (eg: per-cpu workqueues). We don't want usermodehelper targets to + * contend any busy CPU. + * Khelper provides such wide affinity. + * + * Besides, khelper provides the privilege level that caller might not have to + * perform the usermodehelper request. + * + */ static void call_usermodehelper_exec_work(struct work_struct *work) { struct subprocess_info *sub_info = @@ -533,8 +549,8 @@ EXPORT_SYMBOL(call_usermodehelper_setup); * from interrupt context. * * Runs a user-space application. The application is started - * asynchronously if wait is not set, and runs as a child of keventd. - * (ie. it runs with full root capabilities). + * asynchronously if wait is not set, and runs as a child of khelper. + * (ie. it runs with full root capabilities and wide affinity). */ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) { -- cgit v1.3-6-gb490 From 90f023030e26ce8f981b3e688cb79329d8d07cc3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2015 15:38:22 -0700 Subject: kmod: use system_unbound_wq instead of khelper We need to launch the usermodehelper kernel threads with the widest affinity and this is partly why we use khelper. This workqueue has unbound properties and thus a wide affinity inherited by all its children. Now khelper also has special properties that we aren't much interested in: ordered and singlethread. There is really no need about ordering as all we do is creating kernel threads. This can be done concurrently. And singlethread is a useless limitation as well. The workqueue engine already proposes generic unbound workqueues that don't share these useless properties and handle well parallel jobs. The only worrysome specific is their affinity to the node of the current CPU. It's fine for creating the usermodehelper kernel threads but those inherit this affinity for longer jobs such as requesting modules. This patch proposes to use these node affine unbound workqueues assuming that a node is sufficient to handle several parallel usermodehelper requests. Signed-off-by: Frederic Weisbecker Cc: Rik van Riel Reviewed-by: Oleg Nesterov Cc: Christoph Lameter Cc: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 2 -- init/main.c | 1 - kernel/kmod.c | 40 +++++++++++++++++----------------------- 3 files changed, 17 insertions(+), 26 deletions(-) (limited to 'kernel/kmod.c') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 0555cc66a15b..fcfd2bf14d3f 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -85,8 +85,6 @@ enum umh_disable_depth { UMH_DISABLED, }; -extern void usermodehelper_init(void); - extern int __usermodehelper_disable(enum umh_disable_depth depth); extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); diff --git a/init/main.c b/init/main.c index 56506553d4d8..9e64d7097f1a 100644 --- a/init/main.c +++ b/init/main.c @@ -877,7 +877,6 @@ static void __init do_initcalls(void) static void __init do_basic_setup(void) { cpuset_init_smp(); - usermodehelper_init(); shmem_init(); driver_init(); init_irq_proc(); diff --git a/kernel/kmod.c b/kernel/kmod.c index 81c67050c5aa..d38b2dab99a7 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -45,8 +45,6 @@ extern int max_threads; -static struct workqueue_struct *khelper_wq; - #define CAP_BSET (void *)1 #define CAP_PI (void *)2 @@ -225,7 +223,7 @@ static int call_usermodehelper_exec_async(void *data) spin_unlock_irq(¤t->sighand->siglock); /* - * Our parent is khelper which runs with elevated scheduling + * Our parent (unbound workqueue) runs with elevated scheduling * priority. Avoid propagating that into the userspace child. */ set_user_nice(current, 0); @@ -268,9 +266,10 @@ out: } /* - * Handles UMH_WAIT_PROC. Our parent khelper can't wait for usermodehelper - * completion without blocking every other pending requests. That's why - * we use a kernel thread dedicated for that purpose. + * Handles UMH_WAIT_PROC. Our parent (unbound workqueue) might not be able to + * run enough instances to handle usermodehelper completions without blocking + * some other pending requests. That's why we use a kernel thread dedicated for + * that purpose. */ static int call_usermodehelper_exec_sync(void *data) { @@ -312,14 +311,15 @@ static int call_usermodehelper_exec_sync(void *data) /* * This function doesn't strictly needs to be called asynchronously. But we * need to create the usermodehelper kernel threads from a task that is affine - * to all CPUs (or nohz housekeeping ones) such that they inherit a widest - * affinity irrespective of call_usermodehelper() callers with possibly reduced - * affinity (eg: per-cpu workqueues). We don't want usermodehelper targets to - * contend any busy CPU. - * Khelper provides such wide affinity. + * to an optimized set of CPUs (or nohz housekeeping ones) such that they + * inherit a widest affinity irrespective of call_usermodehelper() callers with + * possibly reduced affinity (eg: per-cpu workqueues). We don't want + * usermodehelper targets to contend a busy CPU. + * + * Unbound workqueues provide such wide affinity. * - * Besides, khelper provides the privilege level that caller might not have to - * perform the usermodehelper request. + * Besides, workqueues provide the privilege level that caller might not have + * to perform the usermodehelper request. * */ static void call_usermodehelper_exec_work(struct work_struct *work) @@ -549,8 +549,8 @@ EXPORT_SYMBOL(call_usermodehelper_setup); * from interrupt context. * * Runs a user-space application. The application is started - * asynchronously if wait is not set, and runs as a child of khelper. - * (ie. it runs with full root capabilities and wide affinity). + * asynchronously if wait is not set, and runs as a child of system workqueues. + * (ie. it runs with full root capabilities and optimized affinity). */ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) { @@ -562,7 +562,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) return -EINVAL; } helper_lock(); - if (!khelper_wq || usermodehelper_disabled) { + if (usermodehelper_disabled) { retval = -EBUSY; goto out; } @@ -574,7 +574,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done; sub_info->wait = wait; - queue_work(khelper_wq, &sub_info->work); + queue_work(system_unbound_wq, &sub_info->work); if (wait == UMH_NO_WAIT) /* task has freed sub_info */ goto unlock; @@ -704,9 +704,3 @@ struct ctl_table usermodehelper_table[] = { }, { } }; - -void __init usermodehelper_init(void) -{ - khelper_wq = create_singlethread_workqueue("khelper"); - BUG_ON(!khelper_wq); -} -- cgit v1.3-6-gb490 From bb304a5c6fc63d8506cd9741a3a5f35b73605625 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2015 15:38:25 -0700 Subject: kmod: handle UMH_WAIT_PROC from system unbound workqueue The UMH_WAIT_PROC handler runs in its own thread in order to make sure that waiting for the exec kernel thread completion won't block other usermodehelper queued jobs. On older workqueue implementations, worklets couldn't sleep without blocking the rest of the queue. But now the workqueue subsystem handles that. Khelper still had the older limitation due to its singlethread properties but we replaced it to system unbound workqueues. Those are affine to the current node and can block up to some number of instances. They are a good candidate to handle UMH_WAIT_PROC assuming that we have enough system unbound workers to handle lots of parallel usermodehelper jobs. Signed-off-by: Frederic Weisbecker Cc: Rik van Riel Reviewed-by: Oleg Nesterov Cc: Christoph Lameter Cc: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kmod.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'kernel/kmod.c') diff --git a/kernel/kmod.c b/kernel/kmod.c index d38b2dab99a7..da98d0593de2 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -265,15 +265,9 @@ out: do_exit(0); } -/* - * Handles UMH_WAIT_PROC. Our parent (unbound workqueue) might not be able to - * run enough instances to handle usermodehelper completions without blocking - * some other pending requests. That's why we use a kernel thread dedicated for - * that purpose. - */ -static int call_usermodehelper_exec_sync(void *data) +/* Handles UMH_WAIT_PROC. */ +static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) { - struct subprocess_info *sub_info = data; pid_t pid; /* If SIGCLD is ignored sys_wait4 won't populate the status. */ @@ -287,9 +281,9 @@ static int call_usermodehelper_exec_sync(void *data) * Normally it is bogus to call wait4() from in-kernel because * wait4() wants to write the exit code to a userspace address. * But call_usermodehelper_exec_sync() always runs as kernel - * thread and put_user() to a kernel address works OK for kernel - * threads, due to their having an mm_segment_t which spans the - * entire address space. + * thread (workqueue) and put_user() to a kernel address works + * OK for kernel threads, due to their having an mm_segment_t + * which spans the entire address space. * * Thus the __user pointer cast is valid here. */ @@ -304,19 +298,21 @@ static int call_usermodehelper_exec_sync(void *data) sub_info->retval = ret; } + /* Restore default kernel sig handler */ + kernel_sigaction(SIGCHLD, SIG_IGN); + umh_complete(sub_info); - do_exit(0); } /* - * This function doesn't strictly needs to be called asynchronously. But we - * need to create the usermodehelper kernel threads from a task that is affine + * We need to create the usermodehelper kernel thread from a task that is affine * to an optimized set of CPUs (or nohz housekeeping ones) such that they * inherit a widest affinity irrespective of call_usermodehelper() callers with * possibly reduced affinity (eg: per-cpu workqueues). We don't want * usermodehelper targets to contend a busy CPU. * - * Unbound workqueues provide such wide affinity. + * Unbound workqueues provide such wide affinity and allow to block on + * UMH_WAIT_PROC requests without blocking pending request (up to some limit). * * Besides, workqueues provide the privilege level that caller might not have * to perform the usermodehelper request. @@ -326,18 +322,18 @@ static void call_usermodehelper_exec_work(struct work_struct *work) { struct subprocess_info *sub_info = container_of(work, struct subprocess_info, work); - pid_t pid; - if (sub_info->wait & UMH_WAIT_PROC) - pid = kernel_thread(call_usermodehelper_exec_sync, sub_info, - CLONE_FS | CLONE_FILES | SIGCHLD); - else + if (sub_info->wait & UMH_WAIT_PROC) { + call_usermodehelper_exec_sync(sub_info); + } else { + pid_t pid; + pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); - - if (pid < 0) { - sub_info->retval = pid; - umh_complete(sub_info); + if (pid < 0) { + sub_info->retval = pid; + umh_complete(sub_info); + } } } -- cgit v1.3-6-gb490