From c26251f9f06d27d1941229d237aca44a0e7b4e42 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 26 Oct 2012 13:37:28 +0200 Subject: memcg: split mem_cgroup_force_empty into reclaiming and reparenting parts mem_cgroup_force_empty did two separate things depending on free_all parameter from the very beginning. It either reclaimed as many pages as possible and moved the rest to the parent or just moved charges to the parent. The first variant is used as memory.force_empty callback while the later is used from the mem_cgroup_pre_destroy. The whole games around gotos are far from being nice and there is no reason to keep those two functions inside one. Let's split them and also move the responsibility for css reference counting to their callers to make to code easier. This patch doesn't have any functional changes. Signed-off-by: Michal Hocko Reviewed-by: Tejun Heo Reviewed-by: Glauber Costa Signed-off-by: Tejun Heo --- mm/memcontrol.c | 72 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 30 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 795e525afaba..07d92b84f448 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3739,27 +3739,21 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, } /* - * make mem_cgroup's charge to be 0 if there is no task. + * make mem_cgroup's charge to be 0 if there is no task by moving + * all the charges and pages to the parent. * This enables deleting this mem_cgroup. + * + * Caller is responsible for holding css reference on the memcg. */ -static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all) +static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg) { - int ret; - int node, zid, shrink; - int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct cgroup *cgrp = memcg->css.cgroup; + int node, zid; + int ret; - css_get(&memcg->css); - - shrink = 0; - /* should free all ? */ - if (free_all) - goto try_to_free; -move_account: do { - ret = -EBUSY; if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) - goto out; + return -EBUSY; /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); drain_all_stock_sync(memcg); @@ -3783,27 +3777,34 @@ move_account: cond_resched(); /* "ret" should also be checked to ensure all lists are empty. */ } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret); -out: - css_put(&memcg->css); + return ret; +} + +/* + * Reclaims as many pages from the given memcg as possible and moves + * the rest to the parent. + * + * Caller is responsible for holding css reference for memcg. + */ +static int mem_cgroup_force_empty(struct mem_cgroup *memcg) +{ + int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; + struct cgroup *cgrp = memcg->css.cgroup; -try_to_free: /* returns EBUSY if there is a task or if we come here twice. */ - if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) { - ret = -EBUSY; - goto out; - } + if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) + return -EBUSY; + /* we call try-to-free pages for make this cgroup empty */ lru_add_drain_all(); /* try to free all pages in this cgroup */ - shrink = 1; while (nr_retries && res_counter_read_u64(&memcg->res, RES_USAGE) > 0) { int progress; - if (signal_pending(current)) { - ret = -EINTR; - goto out; - } + if (signal_pending(current)) + return -EINTR; + progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, false); if (!progress) { @@ -3814,13 +3815,19 @@ try_to_free: } lru_add_drain(); - /* try move_account...there may be some *locked* pages. */ - goto move_account; + return mem_cgroup_reparent_charges(memcg); } static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) { - return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + int ret; + + css_get(&memcg->css); + ret = mem_cgroup_force_empty(memcg); + css_put(&memcg->css); + + return ret; } @@ -5003,8 +5010,13 @@ free_out: static int mem_cgroup_pre_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + int ret; - return mem_cgroup_force_empty(memcg, false); + css_get(&memcg->css); + ret = mem_cgroup_reparent_charges(memcg); + css_put(&memcg->css); + + return ret; } static void mem_cgroup_destroy(struct cgroup *cont) -- cgit v1.2.3-59-g8ed1b From d842301181d9a4486aa24720ed4f96018b213292 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 26 Oct 2012 13:37:29 +0200 Subject: memcg: root_cgroup cannot reach mem_cgroup_move_parent The root cgroup cannot be destroyed so we never hit it down the mem_cgroup_pre_destroy path and mem_cgroup_force_empty_write shouldn't even try to do anything if called for the root. This means that mem_cgroup_move_parent doesn't have to bother with the root cgroup and it can assume it can always move charges upwards. Signed-off-by: Michal Hocko Reviewed-by: Tejun Heo Reviewed-by: Glauber Costa Signed-off-by: Tejun Heo --- mm/memcontrol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 07d92b84f448..916132a29b36 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2715,9 +2715,7 @@ static int mem_cgroup_move_parent(struct page *page, unsigned long uninitialized_var(flags); int ret; - /* Is ROOT ? */ - if (mem_cgroup_is_root(child)) - return -EINVAL; + VM_BUG_ON(mem_cgroup_is_root(child)); ret = -EBUSY; if (!get_page_unless_zero(page)) @@ -3823,6 +3821,8 @@ static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); int ret; + if (mem_cgroup_is_root(memcg)) + return -EINVAL; css_get(&memcg->css); ret = mem_cgroup_force_empty(memcg); css_put(&memcg->css); -- cgit v1.2.3-59-g8ed1b From 2ef37d3fe474b218e170010a59066e19427c9847 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 26 Oct 2012 13:37:30 +0200 Subject: memcg: Simplify mem_cgroup_force_empty_list error handling mem_cgroup_force_empty_list currently tries to remove all pages from the given LRU. To prevent from temoporary failures (EBUSY returned by mem_cgroup_move_parent) it uses a margin to the current LRU pages and returns the true if there are still some pages left on the list. If we consider that mem_cgroup_move_parent fails only when it is racing with somebody else removing (uncharging) the page or when the page is migrated then it is obvious that all those failures are only temporal and so we can safely retry later. Let's get rid of the safety margin and make the loop really wait for the empty LRU. The caller should still make sure that all charges have been removed from the res_counter because mem_cgroup_replace_page_cache might add a page to the LRU after the list_empty check (it doesn't touch res_counter though). This catches most of the cases except for shmem which might call mem_cgroup_replace_page_cache with a page which is not charged and on the LRU yet but this was the case also without this patch. In order to fix this we need a guarantee that try_get_mem_cgroup_from_page falls back to the current mm's cgroup so it needs css_tryget to fail. This will be fixed up in a later patch because it needs a help from cgroup core (pre_destroy has to be called after css is cleared). Although mem_cgroup_pre_destroy can still fail (if a new task or a new sub-group appears) there is no reason to retry pre_destroy callback from the cgroup core. This means that __DEPRECATED_clear_css_refs has lost its meaning and it can be removed. Changes since v2 - remove __DEPRECATED_clear_css_refs Changes since v1 - use kerndoc - be more specific about mem_cgroup_move_parent possible failures Signed-off-by: Michal Hocko Reviewed-by: Tejun Heo Reviewed-by: Glauber Costa Signed-off-by: Tejun Heo --- mm/memcontrol.c | 76 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 28 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 916132a29b36..5a1d584ffed3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2702,10 +2702,27 @@ out: return ret; } -/* - * move charges to its parent. +/** + * mem_cgroup_move_parent - moves page to the parent group + * @page: the page to move + * @pc: page_cgroup of the page + * @child: page's cgroup + * + * move charges to its parent or the root cgroup if the group has no + * parent (aka use_hierarchy==0). + * Although this might fail (get_page_unless_zero, isolate_lru_page or + * mem_cgroup_move_account fails) the failure is always temporary and + * it signals a race with a page removal/uncharge or migration. In the + * first case the page is on the way out and it will vanish from the LRU + * on the next attempt and the call should be retried later. + * Isolation from the LRU fails only if page has been isolated from + * the LRU since we looked at it and that usually means either global + * reclaim or migration going on. The page will either get back to the + * LRU or vanish. + * Finaly mem_cgroup_move_account fails only if the page got uncharged + * (!PageCgroupUsed) or moved to a different group. The page will + * disappear in the next attempt. */ - static int mem_cgroup_move_parent(struct page *page, struct page_cgroup *pc, struct mem_cgroup *child) @@ -2732,8 +2749,10 @@ static int mem_cgroup_move_parent(struct page *page, if (!parent) parent = root_mem_cgroup; - if (nr_pages > 1) + if (nr_pages > 1) { + VM_BUG_ON(!PageTransHuge(page)); flags = compound_lock_irqsave(page); + } ret = mem_cgroup_move_account(page, nr_pages, pc, child, parent); @@ -3683,17 +3702,22 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, return nr_reclaimed; } -/* +/** + * mem_cgroup_force_empty_list - clears LRU of a group + * @memcg: group to clear + * @node: NUMA node + * @zid: zone id + * @lru: lru to to clear + * * Traverse a specified page_cgroup list and try to drop them all. This doesn't - * reclaim the pages page themselves - it just removes the page_cgroups. - * Returns true if some page_cgroups were not freed, indicating that the caller - * must retry this operation. + * reclaim the pages page themselves - pages are moved to the parent (or root) + * group. */ -static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, +static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg, int node, int zid, enum lru_list lru) { struct mem_cgroup_per_zone *mz; - unsigned long flags, loop; + unsigned long flags; struct list_head *list; struct page *busy; struct zone *zone; @@ -3702,11 +3726,8 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, mz = mem_cgroup_zoneinfo(memcg, node, zid); list = &mz->lruvec.lists[lru]; - loop = mz->lru_size[lru]; - /* give some margin against EBUSY etc...*/ - loop += 256; busy = NULL; - while (loop--) { + do { struct page_cgroup *pc; struct page *page; @@ -3732,8 +3753,7 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, cond_resched(); } else busy = NULL; - } - return !list_empty(list); + } while (!list_empty(list)); } /* @@ -3747,7 +3767,6 @@ static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg) { struct cgroup *cgrp = memcg->css.cgroup; int node, zid; - int ret; do { if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) @@ -3755,28 +3774,30 @@ static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg) /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); drain_all_stock_sync(memcg); - ret = 0; mem_cgroup_start_move(memcg); for_each_node_state(node, N_HIGH_MEMORY) { - for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { + for (zid = 0; zid < MAX_NR_ZONES; zid++) { enum lru_list lru; for_each_lru(lru) { - ret = mem_cgroup_force_empty_list(memcg, + mem_cgroup_force_empty_list(memcg, node, zid, lru); - if (ret) - break; } } - if (ret) - break; } mem_cgroup_end_move(memcg); memcg_oom_recover(memcg); cond_resched(); - /* "ret" should also be checked to ensure all lists are empty. */ - } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret); - return ret; + /* + * This is a safety check because mem_cgroup_force_empty_list + * could have raced with mem_cgroup_replace_page_cache callers + * so the lru seemed empty but the page could have been added + * right after the check. RES_USAGE should be safe as we always + * charge before adding to the LRU. + */ + } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0); + + return 0; } /* @@ -5618,7 +5639,6 @@ struct cgroup_subsys mem_cgroup_subsys = { .base_cftypes = mem_cgroup_files, .early_init = 0, .use_id = 1, - .__DEPRECATED_clear_css_refs = true, }; #ifdef CONFIG_MEMCG_SWAP -- cgit v1.2.3-59-g8ed1b From e93160803ffda2e67d9ff9cacb63bb6868c8398f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Nov 2012 09:16:58 -0800 Subject: cgroup: kill CSS_REMOVED CSS_REMOVED is one of the several contortions which were necessary to support css reference draining on cgroup removal. All css->refcnts which need draining should be deactivated and verified to equal zero atomically w.r.t. css_tryget(). If any one isn't zero, all refcnts needed to be re-activated and css_tryget() shouldn't fail in the process. This was achieved by letting css_tryget() busy-loop until either the refcnt is reactivated (failed removal attempt) or CSS_REMOVED is set (committing to removal). Now that css refcnt draining is no longer used, there's no need for atomic rollback mechanism. css_tryget() simply can look at the reference count and fail if it's deactivated - it's never getting re-activated. This patch removes CSS_REMOVED and updates __css_tryget() to fail if the refcnt is deactivated. As deactivation and removal are a single step now, they no longer need to be protected against css_tryget() happening from irq context. Remove local_irq_disable/enable() from cgroup_rmdir(). Note that this removes css_is_removed() whose only user is VM_BUG_ON() in memcontrol.c. We can replace it with a check on the refcnt but given that the only use case is a debug assert, I think it's better to simply unexport it. v2: Comment updated and explanation on local_irq_disable/enable() added per Michal Hocko. Signed-off-by: Tejun Heo Reviewed-by: Michal Hocko Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Li Zefan Cc: Johannes Weiner Cc: Balbir Singh --- include/linux/cgroup.h | 6 ------ kernel/cgroup.c | 31 ++++++++++++------------------- mm/memcontrol.c | 7 +++---- 3 files changed, 15 insertions(+), 29 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 02e09c0e98ab..a3098046250b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -85,7 +85,6 @@ struct cgroup_subsys_state { /* bits in struct cgroup_subsys_state flags field */ enum { CSS_ROOT, /* This CSS is the root of the subsystem */ - CSS_REMOVED, /* This CSS is dead */ }; /* Caller must verify that the css is not for root cgroup */ @@ -108,11 +107,6 @@ static inline void css_get(struct cgroup_subsys_state *css) __css_get(css, 1); } -static inline bool css_is_removed(struct cgroup_subsys_state *css) -{ - return test_bit(CSS_REMOVED, &css->flags); -} - /* * Call css_tryget() to take a reference on a css if your existing * (known-valid) reference isn't already ref-counted. Returns false if diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8c605e2bdd1a..c194f9e4fc7b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -170,8 +170,8 @@ struct css_id { * The css to which this ID points. This pointer is set to valid value * after cgroup is populated. If cgroup is removed, this will be NULL. * This pointer is expected to be RCU-safe because destroy() - * is called after synchronize_rcu(). But for safe use, css_is_removed() - * css_tryget() should be used for avoiding race. + * is called after synchronize_rcu(). But for safe use, css_tryget() + * should be used for avoiding race. */ struct cgroup_subsys_state __rcu *css; /* @@ -4112,8 +4112,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) } prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE); - local_irq_disable(); - /* block new css_tryget() by deactivating refcnt */ for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; @@ -4123,21 +4121,14 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) } /* - * Set REMOVED. All in-progress css_tryget() will be released. * Put all the base refs. Each css holds an extra reference to the * cgroup's dentry and cgroup removal proceeds regardless of css * refs. On the last put of each css, whenever that may be, the * extra dentry ref is put so that dentry destruction happens only * after all css's are released. */ - for_each_subsys(cgrp->root, ss) { - struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; - - set_bit(CSS_REMOVED, &css->flags); - css_put(css); - } - - local_irq_enable(); + for_each_subsys(cgrp->root, ss) + css_put(cgrp->subsys[ss->subsys_id]); finish_wait(&cgroup_rmdir_waitq, &wait); clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); @@ -4861,15 +4852,17 @@ static void check_for_release(struct cgroup *cgrp) /* Caller must verify that the css is not for root cgroup */ bool __css_tryget(struct cgroup_subsys_state *css) { - do { - int v = css_refcnt(css); + while (true) { + int t, v; - if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v) + v = css_refcnt(css); + t = atomic_cmpxchg(&css->refcnt, v, v + 1); + if (likely(t == v)) return true; + else if (t < 0) + return false; cpu_relax(); - } while (!test_bit(CSS_REMOVED, &css->flags)); - - return false; + } } EXPORT_SYMBOL_GPL(__css_tryget); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5a1d584ffed3..37c356646544 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2343,7 +2343,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, again: if (*ptr) { /* css should be a valid one */ memcg = *ptr; - VM_BUG_ON(css_is_removed(&memcg->css)); if (mem_cgroup_is_root(memcg)) goto done; if (nr_pages == 1 && consume_stock(memcg)) @@ -2483,9 +2482,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, /* * A helper function to get mem_cgroup from ID. must be called under - * rcu_read_lock(). The caller must check css_is_removed() or some if - * it's concern. (dropping refcnt from swap can be called against removed - * memcg.) + * rcu_read_lock(). The caller is responsible for calling css_tryget if + * the mem_cgroup is used for charging. (dropping refcnt from swap can be + * called against removed memcg.) */ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) { -- cgit v1.2.3-59-g8ed1b From b25ed609d0eecf077db607e88ea70bae83b6adb2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Nov 2012 09:16:59 -0800 Subject: cgroup: remove CGRP_WAIT_ON_RMDIR, cgroup_exclude_rmdir() and cgroup_release_and_wakeup_rmdir() CGRP_WAIT_ON_RMDIR is another kludge which was added to make cgroup destruction rollback somewhat working. cgroup_rmdir() used to drain CSS references and CGRP_WAIT_ON_RMDIR and the associated waitqueue and helpers were used to allow the task performing rmdir to wait for the next relevant event. Unfortunately, the wait is visible to controllers too and the mechanism got exposed to memcg by 887032670d ("cgroup avoid permanent sleep at rmdir"). Now that the draining and retries are gone, CGRP_WAIT_ON_RMDIR is unnecessary. Remove it and all the mechanisms supporting it. Note that memcontrol.c changes are essentially revert of 887032670d ("cgroup avoid permanent sleep at rmdir"). Signed-off-by: Tejun Heo Reviewed-by: Michal Hocko Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Li Zefan Cc: Balbir Singh --- include/linux/cgroup.h | 21 --------------------- kernel/cgroup.c | 51 -------------------------------------------------- mm/memcontrol.c | 24 +----------------------- 3 files changed, 1 insertion(+), 95 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a3098046250b..47868a86ba2b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -144,10 +144,6 @@ enum { CGRP_RELEASABLE, /* Control Group requires release notifications to userspace */ CGRP_NOTIFY_ON_RELEASE, - /* - * A thread in rmdir() is wating for this cgroup. - */ - CGRP_WAIT_ON_RMDIR, /* * Clone cgroup values when creating a new child cgroup */ @@ -411,23 +407,6 @@ int cgroup_task_count(const struct cgroup *cgrp); /* Return true if cgrp is a descendant of the task's cgroup */ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); -/* - * When the subsys has to access css and may add permanent refcnt to css, - * it should take care of racy conditions with rmdir(). Following set of - * functions, is for stop/restart rmdir if necessary. - * Because these will call css_get/put, "css" should be alive css. - * - * cgroup_exclude_rmdir(); - * ...do some jobs which may access arbitrary empty cgroup - * cgroup_release_and_wakeup_rmdir(); - * - * When someone removes a cgroup while cgroup_exclude_rmdir() holds it, - * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up. - */ - -void cgroup_exclude_rmdir(struct cgroup_subsys_state *css); -void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); - /* * Control Group taskset, used to pass around set of tasks to cgroup_subsys * methods. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 66204a6f68f3..c5f6fb28dd0e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -965,33 +965,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry) remove_dir(dentry); } -/* - * A queue for waiters to do rmdir() cgroup. A tasks will sleep when - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some - * reference to css->refcnt. In general, this refcnt is expected to goes down - * to zero, soon. - * - * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; - */ -static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); - -static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) -{ - if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) - wake_up_all(&cgroup_rmdir_waitq); -} - -void cgroup_exclude_rmdir(struct cgroup_subsys_state *css) -{ - css_get(css); -} - -void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) -{ - cgroup_wakeup_rmdir_waiter(css->cgroup); - css_put(css); -} - /* * Call with cgroup_mutex held. Drops reference counts on modules, including * any duplicate ones that parse_cgroupfs_options took. If this function @@ -1963,12 +1936,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) } synchronize_rcu(); - - /* - * wake up rmdir() waiter. the rmdir should fail since the cgroup - * is no longer empty. - */ - cgroup_wakeup_rmdir_waiter(cgrp); out: if (retval) { for_each_subsys(root, ss) { @@ -2138,7 +2105,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * step 5: success! and cleanup */ synchronize_rcu(); - cgroup_wakeup_rmdir_waiter(cgrp); retval = 0; out_put_css_set_refs: if (retval) { @@ -4058,26 +4024,13 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) struct cgroup_event *event, *tmp; struct cgroup_subsys *ss; - /* - * In general, subsystem has no css->refcnt after pre_destroy(). But - * in racy cases, subsystem may have to get css->refcnt after - * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes - * make rmdir return -EBUSY too often. To avoid that, we use waitqueue - * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir - * and subsystem's reference count handling. Please see css_get/put - * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation. - */ - set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); - /* the vfs holds both inode->i_mutex already */ mutex_lock(&cgroup_mutex); parent = cgrp->parent; if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { - clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); mutex_unlock(&cgroup_mutex); return -EBUSY; } - prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE); /* * Block new css_tryget() by deactivating refcnt and mark @cgrp @@ -4114,9 +4067,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) for_each_subsys(cgrp->root, ss) css_put(cgrp->subsys[ss->subsys_id]); - finish_wait(&cgroup_rmdir_waitq, &wait); - clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); - raw_spin_lock(&release_list_lock); if (!list_empty(&cgrp->release_list)) list_del_init(&cgrp->release_list); @@ -4864,7 +4814,6 @@ void __css_put(struct cgroup_subsys_state *css) set_bit(CGRP_RELEASABLE, &cgrp->flags); check_for_release(cgrp); } - cgroup_wakeup_rmdir_waiter(cgrp); break; case 0: schedule_work(&css->dput_work); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 37c356646544..930edfaa5187 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2681,13 +2681,6 @@ static int mem_cgroup_move_account(struct page *page, /* caller should have done css_get */ pc->mem_cgroup = to; mem_cgroup_charge_statistics(to, anon, nr_pages); - /* - * We charges against "to" which may not have any tasks. Then, "to" - * can be under rmdir(). But in current implementation, caller of - * this function is just force_empty() and move charge, so it's - * guaranteed that "to" is never removed. So, we don't check rmdir - * status here. - */ move_unlock_mem_cgroup(from, &flags); ret = 0; unlock: @@ -2893,7 +2886,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, return; if (!memcg) return; - cgroup_exclude_rmdir(&memcg->css); __mem_cgroup_commit_charge(memcg, page, 1, ctype, true); /* @@ -2907,12 +2899,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, swp_entry_t ent = {.val = page_private(page)}; mem_cgroup_uncharge_swap(ent); } - /* - * At swapin, we may charge account against cgroup which has no tasks. - * So, rmdir()->pre_destroy() can be called while we do this charge. - * In that case, we need to call pre_destroy() again. check it here. - */ - cgroup_release_and_wakeup_rmdir(&memcg->css); } void mem_cgroup_commit_charge_swapin(struct page *page, @@ -3360,8 +3346,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, if (!memcg) return; - /* blocks rmdir() */ - cgroup_exclude_rmdir(&memcg->css); + if (!migration_ok) { used = oldpage; unused = newpage; @@ -3395,13 +3380,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, */ if (anon) mem_cgroup_uncharge_page(used); - /* - * At migration, we may charge account against cgroup which has no - * tasks. - * So, rmdir()->pre_destroy() can be called while we do this charge. - * In that case, we need to call pre_destroy() again. check it here. - */ - cgroup_release_and_wakeup_rmdir(&memcg->css); } /* -- cgit v1.2.3-59-g8ed1b From ab5196c202c60f84c7a74975742806aad242d9e3 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 26 Oct 2012 13:37:32 +0200 Subject: memcg: make mem_cgroup_reparent_charges non failing Now that pre_destroy callbacks are called from the context where neither any task can attach the group nor any children group can be added there is no other way to fail from mem_cgroup_pre_destroy. mem_cgroup_pre_destroy doesn't have to take a reference to memcg's css because all css' are marked dead already. tj: Remove now unused local variable @cgrp from mem_cgroup_reparent_charges(). Signed-off-by: Michal Hocko Reviewed-by: Glauber Costa Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Tejun Heo --- mm/memcontrol.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 930edfaa5187..6678f991c6c6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3740,14 +3740,11 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg, * * Caller is responsible for holding css reference on the memcg. */ -static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg) +static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) { - struct cgroup *cgrp = memcg->css.cgroup; int node, zid; do { - if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) - return -EBUSY; /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); drain_all_stock_sync(memcg); @@ -3773,8 +3770,6 @@ static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg) * charge before adding to the LRU. */ } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0); - - return 0; } /* @@ -3811,7 +3806,9 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) } lru_add_drain(); - return mem_cgroup_reparent_charges(memcg); + mem_cgroup_reparent_charges(memcg); + + return 0; } static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) @@ -5008,13 +5005,9 @@ free_out: static int mem_cgroup_pre_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - int ret; - css_get(&memcg->css); - ret = mem_cgroup_reparent_charges(memcg); - css_put(&memcg->css); - - return ret; + mem_cgroup_reparent_charges(memcg); + return 0; } static void mem_cgroup_destroy(struct cgroup *cont) -- cgit v1.2.3-59-g8ed1b From bcf6de1b9129531215d26dd9af8331e84973bc52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Nov 2012 09:16:59 -0800 Subject: cgroup: make ->pre_destroy() return void All ->pre_destory() implementations return 0 now, which is the only allowed return value. Make it return void. Signed-off-by: Tejun Heo Reviewed-by: Michal Hocko Acked-by: KAMEZAWA Hiroyuki Acked-by: Li Zefan Cc: Balbir Singh Cc: Vivek Goyal --- block/blk-cgroup.c | 3 +-- include/linux/cgroup.h | 2 +- kernel/cgroup.c | 2 +- mm/hugetlb_cgroup.c | 4 +--- mm/memcontrol.c | 3 +-- 5 files changed, 5 insertions(+), 9 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f3b44a65fc7a..a7816f3d0059 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -600,7 +600,7 @@ struct cftype blkcg_files[] = { * * This is the blkcg counterpart of ioc_release_fn(). */ -static int blkcg_pre_destroy(struct cgroup *cgroup) +static void blkcg_pre_destroy(struct cgroup *cgroup) { struct blkcg *blkcg = cgroup_to_blkcg(cgroup); @@ -622,7 +622,6 @@ static int blkcg_pre_destroy(struct cgroup *cgroup) } spin_unlock_irq(&blkcg->lock); - return 0; } static void blkcg_destroy(struct cgroup *cgroup) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 47868a86ba2b..adb2adc8ec1a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -436,7 +436,7 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); struct cgroup_subsys { struct cgroup_subsys_state *(*create)(struct cgroup *cgrp); - int (*pre_destroy)(struct cgroup *cgrp); + void (*pre_destroy)(struct cgroup *cgrp); void (*destroy)(struct cgroup *cgrp); int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c5f6fb28dd0e..83cd7d041c62 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4054,7 +4054,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) mutex_unlock(&cgroup_mutex); for_each_subsys(cgrp->root, ss) if (ss->pre_destroy) - WARN_ON_ONCE(ss->pre_destroy(cgrp)); + ss->pre_destroy(cgrp); mutex_lock(&cgroup_mutex); /* diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index dc595c6b1f55..0d3a1a317731 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -155,7 +155,7 @@ out: * Force the hugetlb cgroup to empty the hugetlb resources by moving them to * the parent cgroup. */ -static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup) +static void hugetlb_cgroup_pre_destroy(struct cgroup *cgroup) { struct hstate *h; struct page *page; @@ -172,8 +172,6 @@ static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup) } cond_resched(); } while (hugetlb_cgroup_have_usage(cgroup)); - - return 0; } int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6678f991c6c6..a1811ce60e20 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5002,12 +5002,11 @@ free_out: return ERR_PTR(error); } -static int mem_cgroup_pre_destroy(struct cgroup *cont) +static void mem_cgroup_pre_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); mem_cgroup_reparent_charges(memcg); - return 0; } static void mem_cgroup_destroy(struct cgroup *cont) -- cgit v1.2.3-59-g8ed1b From 92fb97487a7e41b222c1417cabd1d1ab7cc3a48c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 Nov 2012 08:13:38 -0800 Subject: cgroup: rename ->create/post_create/pre_destroy/destroy() to ->css_alloc/online/offline/free() Rename cgroup_subsys css lifetime related callbacks to better describe what their roles are. Also, update documentation. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- Documentation/cgroups/cgroups.txt | 49 ++++++++++++++++++++++--------------- block/blk-cgroup.c | 14 +++++------ include/linux/cgroup.h | 35 ++++++++++++++------------- kernel/cgroup.c | 51 ++++++++++++++++++++------------------- kernel/cgroup_freezer.c | 20 +++++++-------- kernel/cpuset.c | 10 ++++---- kernel/events/core.c | 8 +++--- kernel/sched/core.c | 16 ++++++------ mm/hugetlb_cgroup.c | 14 +++++------ mm/memcontrol.c | 12 ++++----- net/core/netprio_cgroup.c | 8 +++--- net/sched/cls_cgroup.c | 8 +++--- security/device_cgroup.c | 8 +++--- 13 files changed, 132 insertions(+), 121 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 9e04196c4d78..b06eea217403 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -553,16 +553,16 @@ call to cgroup_unload_subsys(). It should also set its_subsys.module = THIS_MODULE in its .c file. Each subsystem may export the following methods. The only mandatory -methods are create/destroy. Any others that are null are presumed to +methods are css_alloc/free. Any others that are null are presumed to be successful no-ops. -struct cgroup_subsys_state *create(struct cgroup *cgrp) +struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp) (cgroup_mutex held by caller) -Called to create a subsystem state object for a cgroup. The +Called to allocate a subsystem state object for a cgroup. The subsystem should allocate its subsystem state object for the passed cgroup, returning a pointer to the new object on success or a -negative error code. On success, the subsystem pointer should point to +ERR_PTR() value. On success, the subsystem pointer should point to a structure of type cgroup_subsys_state (typically embedded in a larger subsystem-specific object), which will be initialized by the cgroup system. Note that this will be called at initialization to @@ -571,24 +571,33 @@ identified by the passed cgroup object having a NULL parent (since it's the root of the hierarchy) and may be an appropriate place for initialization code. -void destroy(struct cgroup *cgrp) +int css_online(struct cgroup *cgrp) (cgroup_mutex held by caller) -The cgroup system is about to destroy the passed cgroup; the subsystem -should do any necessary cleanup and free its subsystem state -object. By the time this method is called, the cgroup has already been -unlinked from the file system and from the child list of its parent; -cgroup->parent is still valid. (Note - can also be called for a -newly-created cgroup if an error occurs after this subsystem's -create() method has been called for the new cgroup). - -int pre_destroy(struct cgroup *cgrp); - -Called before checking the reference count on each subsystem. This may -be useful for subsystems which have some extra references even if -there are not tasks in the cgroup. If pre_destroy() returns error code, -rmdir() will fail with it. From this behavior, pre_destroy() can be -called multiple times against a cgroup. +Called after @cgrp successfully completed all allocations and made +visible to cgroup_for_each_child/descendant_*() iterators. The +subsystem may choose to fail creation by returning -errno. This +callback can be used to implement reliable state sharing and +propagation along the hierarchy. See the comment on +cgroup_for_each_descendant_pre() for details. + +void css_offline(struct cgroup *cgrp); + +This is the counterpart of css_online() and called iff css_online() +has succeeded on @cgrp. This signifies the beginning of the end of +@cgrp. @cgrp is being removed and the subsystem should start dropping +all references it's holding on @cgrp. When all references are dropped, +cgroup removal will proceed to the next step - css_free(). After this +callback, @cgrp should be considered dead to the subsystem. + +void css_free(struct cgroup *cgrp) +(cgroup_mutex held by caller) + +The cgroup system is about to free @cgrp; the subsystem should free +its subsystem state object. By the time this method is called, @cgrp +is completely unused; @cgrp->parent is still valid. (Note - can also +be called for a newly-created cgroup if an error occurs after this +subsystem's create() method has been called for the new cgroup). int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) (cgroup_mutex held by caller) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 3dc60fc441cb..3f6d39d23bb6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -600,7 +600,7 @@ struct cftype blkcg_files[] = { }; /** - * blkcg_pre_destroy - cgroup pre_destroy callback + * blkcg_css_offline - cgroup css_offline callback * @cgroup: cgroup of interest * * This function is called when @cgroup is about to go away and responsible @@ -610,7 +610,7 @@ struct cftype blkcg_files[] = { * * This is the blkcg counterpart of ioc_release_fn(). */ -static void blkcg_pre_destroy(struct cgroup *cgroup) +static void blkcg_css_offline(struct cgroup *cgroup) { struct blkcg *blkcg = cgroup_to_blkcg(cgroup); @@ -634,7 +634,7 @@ static void blkcg_pre_destroy(struct cgroup *cgroup) spin_unlock_irq(&blkcg->lock); } -static void blkcg_destroy(struct cgroup *cgroup) +static void blkcg_css_free(struct cgroup *cgroup) { struct blkcg *blkcg = cgroup_to_blkcg(cgroup); @@ -642,7 +642,7 @@ static void blkcg_destroy(struct cgroup *cgroup) kfree(blkcg); } -static struct cgroup_subsys_state *blkcg_create(struct cgroup *cgroup) +static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) { static atomic64_t id_seq = ATOMIC64_INIT(0); struct blkcg *blkcg; @@ -739,10 +739,10 @@ static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) struct cgroup_subsys blkio_subsys = { .name = "blkio", - .create = blkcg_create, + .css_alloc = blkcg_css_alloc, + .css_offline = blkcg_css_offline, + .css_free = blkcg_css_free, .can_attach = blkcg_can_attach, - .pre_destroy = blkcg_pre_destroy, - .destroy = blkcg_destroy, .subsys_id = blkio_subsys_id, .base_cftypes = blkcg_files, .module = THIS_MODULE, diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 03d8a92786da..7a2189ca8327 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -82,7 +82,7 @@ struct cgroup_subsys_state { /* bits in struct cgroup_subsys_state flags field */ enum { CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */ - CSS_ONLINE = (1 << 1), /* between ->post_create() and ->pre_destroy() */ + CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ }; /* Caller must verify that the css is not for root cgroup */ @@ -439,10 +439,11 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); */ struct cgroup_subsys { - struct cgroup_subsys_state *(*create)(struct cgroup *cgrp); - int (*post_create)(struct cgroup *cgrp); - void (*pre_destroy)(struct cgroup *cgrp); - void (*destroy)(struct cgroup *cgrp); + struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp); + int (*css_online)(struct cgroup *cgrp); + void (*css_offline)(struct cgroup *cgrp); + void (*css_free)(struct cgroup *cgrp); + int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); @@ -541,13 +542,13 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, * @cgroup: cgroup whose children to walk * * Walk @cgroup's children. Must be called under rcu_read_lock(). A child - * cgroup which hasn't finished ->post_create() or already has finished - * ->pre_destroy() may show up during traversal and it's each subsystem's + * cgroup which hasn't finished ->css_online() or already has finished + * ->css_offline() may show up during traversal and it's each subsystem's * responsibility to verify that each @pos is alive. * - * If a subsystem synchronizes against the parent in its ->post_create() - * and before starting iterating, a cgroup which finished ->post_create() - * is guaranteed to be visible in the future iterations. + * If a subsystem synchronizes against the parent in its ->css_online() and + * before starting iterating, a cgroup which finished ->css_online() is + * guaranteed to be visible in the future iterations. */ #define cgroup_for_each_child(pos, cgroup) \ list_for_each_entry_rcu(pos, &(cgroup)->children, sibling) @@ -561,19 +562,19 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, * @cgroup: cgroup whose descendants to walk * * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A - * descendant cgroup which hasn't finished ->post_create() or already has - * finished ->pre_destroy() may show up during traversal and it's each + * descendant cgroup which hasn't finished ->css_online() or already has + * finished ->css_offline() may show up during traversal and it's each * subsystem's responsibility to verify that each @pos is alive. * - * If a subsystem synchronizes against the parent in its ->post_create() - * and before starting iterating, and synchronizes against @pos on each - * iteration, any descendant cgroup which finished ->post_create() is + * If a subsystem synchronizes against the parent in its ->css_online() and + * before starting iterating, and synchronizes against @pos on each + * iteration, any descendant cgroup which finished ->css_offline() is * guaranteed to be visible in the future iterations. * * In other words, the following guarantees that a descendant can't escape * state updates of its ancestors. * - * my_post_create(@cgrp) + * my_online(@cgrp) * { * Lock @cgrp->parent and @cgrp; * Inherit state from @cgrp->parent; @@ -606,7 +607,7 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, * iteration should lock and unlock both @pos->parent and @pos. * * Alternatively, a subsystem may choose to use a single global lock to - * synchronize ->post_create() and ->pre_destroy() against tree-walking + * synchronize ->css_online() and ->css_offline() against tree-walking * operations. */ #define cgroup_for_each_descendant_pre(pos, cgroup) \ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c389f4258681..d35463bab487 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -876,7 +876,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) * Release the subsystem state objects. */ for_each_subsys(cgrp->root, ss) - ss->destroy(cgrp); + ss->css_free(cgrp); cgrp->root->number_of_cgroups--; mutex_unlock(&cgroup_mutex); @@ -4048,8 +4048,8 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp) lockdep_assert_held(&cgroup_mutex); - if (ss->post_create) - ret = ss->post_create(cgrp); + if (ss->css_online) + ret = ss->css_online(cgrp); if (!ret) cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE; return ret; @@ -4067,14 +4067,14 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp) return; /* - * pre_destroy() should be called with cgroup_mutex unlocked. See + * css_offline() should be called with cgroup_mutex unlocked. See * 3fa59dfbc3 ("cgroup: fix potential deadlock in pre_destroy") for * details. This temporary unlocking should go away once * cgroup_mutex is unexported from controllers. */ - if (ss->pre_destroy) { + if (ss->css_offline) { mutex_unlock(&cgroup_mutex); - ss->pre_destroy(cgrp); + ss->css_offline(cgrp); mutex_lock(&cgroup_mutex); } @@ -4136,7 +4136,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, for_each_subsys(root, ss) { struct cgroup_subsys_state *css; - css = ss->create(cgrp); + css = ss->css_alloc(cgrp); if (IS_ERR(css)) { err = PTR_ERR(css); goto err_free_all; @@ -4147,7 +4147,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (err) goto err_free_all; } - /* At error, ->destroy() callback has to free assigned ID. */ + /* At error, ->css_free() callback has to free assigned ID. */ if (clone_children(parent) && ss->post_clone) ss->post_clone(cgrp); @@ -4201,7 +4201,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, err_free_all: for_each_subsys(root, ss) { if (cgrp->subsys[ss->subsys_id]) - ss->destroy(cgrp); + ss->css_free(cgrp); } mutex_unlock(&cgroup_mutex); /* Release the reference count that we took on the superblock */ @@ -4381,7 +4381,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) /* Create the top cgroup state for this subsystem */ list_add(&ss->sibling, &rootnode.subsys_list); ss->root = &rootnode; - css = ss->create(dummytop); + css = ss->css_alloc(dummytop); /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_cgroup_css(css, ss, dummytop); @@ -4425,7 +4425,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) /* check name and function validity */ if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || - ss->create == NULL || ss->destroy == NULL) + ss->css_alloc == NULL || ss->css_free == NULL) return -EINVAL; /* @@ -4454,10 +4454,11 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) subsys[ss->subsys_id] = ss; /* - * no ss->create seems to need anything important in the ss struct, so - * this can happen first (i.e. before the rootnode attachment). + * no ss->css_alloc seems to need anything important in the ss + * struct, so this can happen first (i.e. before the rootnode + * attachment). */ - css = ss->create(dummytop); + css = ss->css_alloc(dummytop); if (IS_ERR(css)) { /* failure case - need to deassign the subsys[] slot. */ subsys[ss->subsys_id] = NULL; @@ -4577,12 +4578,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) write_unlock(&css_set_lock); /* - * remove subsystem's css from the dummytop and free it - need to free - * before marking as null because ss->destroy needs the cgrp->subsys - * pointer to find their state. note that this also takes care of - * freeing the css_id. + * remove subsystem's css from the dummytop and free it - need to + * free before marking as null because ss->css_free needs the + * cgrp->subsys pointer to find their state. note that this also + * takes care of freeing the css_id. */ - ss->destroy(dummytop); + ss->css_free(dummytop); dummytop->subsys[ss->subsys_id] = NULL; mutex_unlock(&cgroup_mutex); @@ -4626,8 +4627,8 @@ int __init cgroup_init_early(void) BUG_ON(!ss->name); BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN); - BUG_ON(!ss->create); - BUG_ON(!ss->destroy); + BUG_ON(!ss->css_alloc); + BUG_ON(!ss->css_free); if (ss->subsys_id != i) { printk(KERN_ERR "cgroup: Subsys %s id == %d\n", ss->name, ss->subsys_id); @@ -5439,7 +5440,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) } #ifdef CONFIG_CGROUP_DEBUG -static struct cgroup_subsys_state *debug_create(struct cgroup *cont) +static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cont) { struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); @@ -5449,7 +5450,7 @@ static struct cgroup_subsys_state *debug_create(struct cgroup *cont) return css; } -static void debug_destroy(struct cgroup *cont) +static void debug_css_free(struct cgroup *cont) { kfree(cont->subsys[debug_subsys_id]); } @@ -5578,8 +5579,8 @@ static struct cftype debug_files[] = { struct cgroup_subsys debug_subsys = { .name = "debug", - .create = debug_create, - .destroy = debug_destroy, + .css_alloc = debug_css_alloc, + .css_free = debug_css_free, .subsys_id = debug_subsys_id, .base_cftypes = debug_files, }; diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index ee8bb671688c..75dda1ea5026 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -92,7 +92,7 @@ static const char *freezer_state_strs(unsigned int state) struct cgroup_subsys freezer_subsys; -static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) +static struct cgroup_subsys_state *freezer_css_alloc(struct cgroup *cgroup) { struct freezer *freezer; @@ -105,14 +105,14 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) } /** - * freezer_post_create - commit creation of a freezer cgroup + * freezer_css_online - commit creation of a freezer cgroup * @cgroup: cgroup being created * * We're committing to creation of @cgroup. Mark it online and inherit * parent's freezing state while holding both parent's and our * freezer->lock. */ -static int freezer_post_create(struct cgroup *cgroup) +static int freezer_css_online(struct cgroup *cgroup) { struct freezer *freezer = cgroup_freezer(cgroup); struct freezer *parent = parent_freezer(freezer); @@ -141,13 +141,13 @@ static int freezer_post_create(struct cgroup *cgroup) } /** - * freezer_pre_destroy - initiate destruction of @cgroup + * freezer_css_offline - initiate destruction of @cgroup * @cgroup: cgroup being destroyed * * @cgroup is going away. Mark it dead and decrement system_freezing_count * if it was holding one. */ -static void freezer_pre_destroy(struct cgroup *cgroup) +static void freezer_css_offline(struct cgroup *cgroup) { struct freezer *freezer = cgroup_freezer(cgroup); @@ -161,7 +161,7 @@ static void freezer_pre_destroy(struct cgroup *cgroup) spin_unlock_irq(&freezer->lock); } -static void freezer_destroy(struct cgroup *cgroup) +static void freezer_css_free(struct cgroup *cgroup) { kfree(cgroup_freezer(cgroup)); } @@ -477,10 +477,10 @@ static struct cftype files[] = { struct cgroup_subsys freezer_subsys = { .name = "freezer", - .create = freezer_create, - .post_create = freezer_post_create, - .pre_destroy = freezer_pre_destroy, - .destroy = freezer_destroy, + .css_alloc = freezer_css_alloc, + .css_online = freezer_css_online, + .css_offline = freezer_css_offline, + .css_free = freezer_css_free, .subsys_id = freezer_subsys_id, .attach = freezer_attach, .fork = freezer_fork, diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f33c7153b6d7..06931337c4e5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1821,11 +1821,11 @@ static void cpuset_post_clone(struct cgroup *cgroup) } /* - * cpuset_create - create a cpuset + * cpuset_css_alloc - allocate a cpuset css * cont: control group that the new cpuset will be part of */ -static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) +static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) { struct cpuset *cs; struct cpuset *parent; @@ -1864,7 +1864,7 @@ static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) * will call async_rebuild_sched_domains(). */ -static void cpuset_destroy(struct cgroup *cont) +static void cpuset_css_free(struct cgroup *cont) { struct cpuset *cs = cgroup_cs(cont); @@ -1878,8 +1878,8 @@ static void cpuset_destroy(struct cgroup *cont) struct cgroup_subsys cpuset_subsys = { .name = "cpuset", - .create = cpuset_create, - .destroy = cpuset_destroy, + .css_alloc = cpuset_css_alloc, + .css_free = cpuset_css_free, .can_attach = cpuset_can_attach, .attach = cpuset_attach, .post_clone = cpuset_post_clone, diff --git a/kernel/events/core.c b/kernel/events/core.c index dbccf83c134d..f9ff5493171d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7434,7 +7434,7 @@ unlock: device_initcall(perf_event_sysfs_init); #ifdef CONFIG_CGROUP_PERF -static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont) +static struct cgroup_subsys_state *perf_cgroup_css_alloc(struct cgroup *cont) { struct perf_cgroup *jc; @@ -7451,7 +7451,7 @@ static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont) return &jc->css; } -static void perf_cgroup_destroy(struct cgroup *cont) +static void perf_cgroup_css_free(struct cgroup *cont) { struct perf_cgroup *jc; jc = container_of(cgroup_subsys_state(cont, perf_subsys_id), @@ -7492,8 +7492,8 @@ static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, struct cgroup_subsys perf_subsys = { .name = "perf_event", .subsys_id = perf_subsys_id, - .create = perf_cgroup_create, - .destroy = perf_cgroup_destroy, + .css_alloc = perf_cgroup_css_alloc, + .css_free = perf_cgroup_css_free, .exit = perf_cgroup_exit, .attach = perf_cgroup_attach, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d8927fda712..6f20c8fb2326 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7468,7 +7468,7 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp) struct task_group, css); } -static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp) +static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp) { struct task_group *tg, *parent; @@ -7485,7 +7485,7 @@ static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp) return &tg->css; } -static void cpu_cgroup_destroy(struct cgroup *cgrp) +static void cpu_cgroup_css_free(struct cgroup *cgrp) { struct task_group *tg = cgroup_tg(cgrp); @@ -7845,8 +7845,8 @@ static struct cftype cpu_files[] = { struct cgroup_subsys cpu_cgroup_subsys = { .name = "cpu", - .create = cpu_cgroup_create, - .destroy = cpu_cgroup_destroy, + .css_alloc = cpu_cgroup_css_alloc, + .css_free = cpu_cgroup_css_free, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, .exit = cpu_cgroup_exit, @@ -7869,7 +7869,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { struct cpuacct root_cpuacct; /* create a new cpu accounting group */ -static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) +static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) { struct cpuacct *ca; @@ -7899,7 +7899,7 @@ out: } /* destroy an existing cpu accounting group */ -static void cpuacct_destroy(struct cgroup *cgrp) +static void cpuacct_css_free(struct cgroup *cgrp) { struct cpuacct *ca = cgroup_ca(cgrp); @@ -8070,8 +8070,8 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime) struct cgroup_subsys cpuacct_subsys = { .name = "cpuacct", - .create = cpuacct_create, - .destroy = cpuacct_destroy, + .css_alloc = cpuacct_css_alloc, + .css_free = cpuacct_css_free, .subsys_id = cpuacct_subsys_id, .base_cftypes = files, }; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 0d3a1a317731..b5bde7a5c017 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -77,7 +77,7 @@ static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg) return false; } -static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup) +static struct cgroup_subsys_state *hugetlb_cgroup_css_alloc(struct cgroup *cgroup) { int idx; struct cgroup *parent_cgroup; @@ -101,7 +101,7 @@ static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup) return &h_cgroup->css; } -static void hugetlb_cgroup_destroy(struct cgroup *cgroup) +static void hugetlb_cgroup_css_free(struct cgroup *cgroup) { struct hugetlb_cgroup *h_cgroup; @@ -155,7 +155,7 @@ out: * Force the hugetlb cgroup to empty the hugetlb resources by moving them to * the parent cgroup. */ -static void hugetlb_cgroup_pre_destroy(struct cgroup *cgroup) +static void hugetlb_cgroup_css_offline(struct cgroup *cgroup) { struct hstate *h; struct page *page; @@ -404,8 +404,8 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) struct cgroup_subsys hugetlb_subsys = { .name = "hugetlb", - .create = hugetlb_cgroup_create, - .pre_destroy = hugetlb_cgroup_pre_destroy, - .destroy = hugetlb_cgroup_destroy, - .subsys_id = hugetlb_subsys_id, + .css_alloc = hugetlb_cgroup_css_alloc, + .css_offline = hugetlb_cgroup_css_offline, + .css_free = hugetlb_cgroup_css_free, + .subsys_id = hugetlb_subsys_id, }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 08adaaae6fcc..8b0b2b028e23 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4922,7 +4922,7 @@ err_cleanup: } static struct cgroup_subsys_state * __ref -mem_cgroup_create(struct cgroup *cont) +mem_cgroup_css_alloc(struct cgroup *cont) { struct mem_cgroup *memcg, *parent; long error = -ENOMEM; @@ -5003,14 +5003,14 @@ free_out: return ERR_PTR(error); } -static void mem_cgroup_pre_destroy(struct cgroup *cont) +static void mem_cgroup_css_offline(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); mem_cgroup_reparent_charges(memcg); } -static void mem_cgroup_destroy(struct cgroup *cont) +static void mem_cgroup_css_free(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); @@ -5600,9 +5600,9 @@ static void mem_cgroup_move_task(struct cgroup *cont, struct cgroup_subsys mem_cgroup_subsys = { .name = "memory", .subsys_id = mem_cgroup_subsys_id, - .create = mem_cgroup_create, - .pre_destroy = mem_cgroup_pre_destroy, - .destroy = mem_cgroup_destroy, + .css_alloc = mem_cgroup_css_alloc, + .css_offline = mem_cgroup_css_offline, + .css_free = mem_cgroup_css_free, .can_attach = mem_cgroup_can_attach, .cancel_attach = mem_cgroup_cancel_attach, .attach = mem_cgroup_move_task, diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 79285a36035f..f0b6b0d572c1 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -108,7 +108,7 @@ static int write_update_netdev_table(struct net_device *dev) return ret; } -static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) +static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; int ret = -EINVAL; @@ -132,7 +132,7 @@ out: return ERR_PTR(ret); } -static void cgrp_destroy(struct cgroup *cgrp) +static void cgrp_css_free(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; struct net_device *dev; @@ -276,8 +276,8 @@ static struct cftype ss_files[] = { struct cgroup_subsys net_prio_subsys = { .name = "net_prio", - .create = cgrp_create, - .destroy = cgrp_destroy, + .css_alloc = cgrp_css_alloc, + .css_free = cgrp_css_free, .attach = net_prio_attach, .subsys_id = net_prio_subsys_id, .base_cftypes = ss_files, diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 2ecde225ae60..8cdc18e075fb 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -34,7 +34,7 @@ static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) struct cgroup_cls_state, css); } -static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) +static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) { struct cgroup_cls_state *cs; @@ -48,7 +48,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) return &cs->css; } -static void cgrp_destroy(struct cgroup *cgrp) +static void cgrp_css_free(struct cgroup *cgrp) { kfree(cgrp_cls_state(cgrp)); } @@ -75,8 +75,8 @@ static struct cftype ss_files[] = { struct cgroup_subsys net_cls_subsys = { .name = "net_cls", - .create = cgrp_create, - .destroy = cgrp_destroy, + .css_alloc = cgrp_css_alloc, + .css_free = cgrp_css_free, .subsys_id = net_cls_subsys_id, .base_cftypes = ss_files, .module = THIS_MODULE, diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 78a16f5b7275..19ecc8de9e6b 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -180,7 +180,7 @@ static void dev_exception_clean(struct dev_cgroup *dev_cgroup) /* * called from kernel/cgroup.c with cgroup_lock() held. */ -static struct cgroup_subsys_state *devcgroup_create(struct cgroup *cgroup) +static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) { struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; struct cgroup *parent_cgroup; @@ -210,7 +210,7 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup *cgroup) return &dev_cgroup->css; } -static void devcgroup_destroy(struct cgroup *cgroup) +static void devcgroup_css_free(struct cgroup *cgroup) { struct dev_cgroup *dev_cgroup; @@ -564,8 +564,8 @@ static struct cftype dev_cgroup_files[] = { struct cgroup_subsys devices_subsys = { .name = "devices", .can_attach = devcgroup_can_attach, - .create = devcgroup_create, - .destroy = devcgroup_destroy, + .css_alloc = devcgroup_css_alloc, + .css_free = devcgroup_css_free, .subsys_id = devices_subsys_id, .base_cftypes = dev_cgroup_files, -- cgit v1.2.3-59-g8ed1b