aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c65
1 files changed, 57 insertions, 8 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index d896e9ca38b0..07cddff89c7b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -91,6 +91,7 @@
#include <linux/kcov.h>
#include <linux/livepatch.h>
#include <linux/thread_info.h>
+#include <linux/stackleak.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -223,9 +224,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
return s->addr;
}
+ /*
+ * Allocated stacks are cached and later reused by new threads,
+ * so memcg accounting is performed manually on assigning/releasing
+ * stacks to tasks. Drop __GFP_ACCOUNT.
+ */
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP,
+ THREADINFO_GFP & ~__GFP_ACCOUNT,
PAGE_KERNEL,
0, node, __builtin_return_address(0));
@@ -248,9 +254,19 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
static inline void free_thread_stack(struct task_struct *tsk)
{
#ifdef CONFIG_VMAP_STACK
- if (task_stack_vm_area(tsk)) {
+ struct vm_struct *vm = task_stack_vm_area(tsk);
+
+ if (vm) {
int i;
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+ mod_memcg_page_state(vm->pages[i],
+ MEMCG_KERNEL_STACK_KB,
+ -(int)(PAGE_SIZE / 1024));
+
+ memcg_kmem_uncharge(vm->pages[i], 0);
+ }
+
for (i = 0; i < NR_CACHED_STACKS; i++) {
if (this_cpu_cmpxchg(cached_stacks[i],
NULL, tsk->stack_vm_area) != NULL)
@@ -351,10 +367,6 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
NR_KERNEL_STACK_KB,
PAGE_SIZE / 1024 * account);
}
-
- /* All stack pages belong to the same memcg. */
- mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
} else {
/*
* All stack pages are in the same zone and belong to the
@@ -370,6 +382,35 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
}
}
+static int memcg_charge_kernel_stack(struct task_struct *tsk)
+{
+#ifdef CONFIG_VMAP_STACK
+ struct vm_struct *vm = task_stack_vm_area(tsk);
+ int ret;
+
+ if (vm) {
+ int i;
+
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+ /*
+ * If memcg_kmem_charge() fails, page->mem_cgroup
+ * pointer is NULL, and both memcg_kmem_uncharge()
+ * and mod_memcg_page_state() in free_thread_stack()
+ * will ignore this page. So it's safe.
+ */
+ ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0);
+ if (ret)
+ return ret;
+
+ mod_memcg_page_state(vm->pages[i],
+ MEMCG_KERNEL_STACK_KB,
+ PAGE_SIZE / 1024);
+ }
+ }
+#endif
+ return 0;
+}
+
static void release_task_stack(struct task_struct *tsk)
{
if (WARN_ON(tsk->state != TASK_DEAD))
@@ -550,8 +591,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
goto out;
}
/* a new mm has just been created */
- arch_dup_mmap(oldmm, mm);
- retval = 0;
+ retval = arch_dup_mmap(oldmm, mm);
out:
up_write(&mm->mmap_sem);
flush_tlb_mm(oldmm);
@@ -808,6 +848,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (!stack)
goto free_tsk;
+ if (memcg_charge_kernel_stack(tsk))
+ goto free_stack;
+
stack_vm_area = task_stack_vm_area(tsk);
err = arch_dup_task_struct(tsk, orig);
@@ -1780,6 +1823,10 @@ static __latent_entropy struct task_struct *copy_process(
p->default_timer_slack_ns = current->timer_slack_ns;
+#ifdef CONFIG_PSI
+ p->psi_flags = 0;
+#endif
+
task_io_accounting_init(&p->ioac);
acct_clear_integrals(p);
@@ -1880,6 +1927,8 @@ static __latent_entropy struct task_struct *copy_process(
if (retval)
goto bad_fork_cleanup_io;
+ stackleak_task_init(p);
+
if (pid != &init_struct_pid) {
pid = alloc_pid(p->nsproxy->pid_ns_for_children);
if (IS_ERR(pid)) {