diff options
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/backing-dev.h | 5 | ||||
-rw-r--r-- | include/linux/cgroup-defs.h | 60 | ||||
-rw-r--r-- | include/linux/cgroup.h | 114 | ||||
-rw-r--r-- | include/linux/hugetlb_cgroup.h | 4 | ||||
-rw-r--r-- | include/linux/init_task.h | 8 | ||||
-rw-r--r-- | include/linux/jump_label.h | 18 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 8 | ||||
-rw-r--r-- | include/linux/sched.h | 12 |
8 files changed, 125 insertions, 104 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index d5eb4ad1c534..08d9a8eac42c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -13,7 +13,6 @@ #include <linux/sched.h> #include <linux/blkdev.h> #include <linux/writeback.h> -#include <linux/memcontrol.h> #include <linux/blk-cgroup.h> #include <linux/backing-dev-defs.h> #include <linux/slab.h> @@ -263,8 +262,8 @@ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode); - return cgroup_on_dfl(mem_cgroup_root_css->cgroup) && - cgroup_on_dfl(blkcg_root_css->cgroup) && + return cgroup_subsys_on_dfl(memory_cgrp_subsys) && + cgroup_subsys_on_dfl(io_cgrp_subsys) && bdi_cap_account_dirty(bdi) && (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8492721b39be..df589a097539 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -76,6 +76,7 @@ enum { CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ + CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ @@ -83,6 +84,17 @@ enum { }; /* + * cgroup_file is the handle for a file instance created in a cgroup which + * is used, for example, to generate file changed notifications. This can + * be obtained by setting cftype->file_offset. + */ +struct cgroup_file { + /* do not access any fields from outside cgroup core */ + struct list_head node; /* anchored at css->files */ + struct kernfs_node *kn; +}; + +/* * Per-subsystem/per-cgroup state maintained by the system. This is the * fundamental structural building block that controllers deal with. * @@ -122,6 +134,9 @@ struct cgroup_subsys_state { */ u64 serial_nr; + /* all cgroup_files associated with this css */ + struct list_head files; + /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; @@ -225,8 +240,8 @@ struct cgroup { int populated_cnt; struct kernfs_node *kn; /* cgroup kernfs entry */ - struct kernfs_node *procs_kn; /* kn for "cgroup.procs" */ - struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ + struct cgroup_file procs_file; /* handle for "cgroup.procs" */ + struct cgroup_file events_file; /* handle for "cgroup.events" */ /* * The bitmask of subsystems enabled on the child cgroups. @@ -324,11 +339,6 @@ struct cftype { */ char name[MAX_CFTYPE_NAME]; unsigned long private; - /* - * If not 0, file mode is set to this value, otherwise it will - * be figured out automatically - */ - umode_t mode; /* * The maximum length of string, excluding trailing nul, that can @@ -340,6 +350,14 @@ struct cftype { unsigned int flags; /* + * If non-zero, should contain the offset from the start of css to + * a struct cgroup_file field. cgroup will record the handle of + * the created file into it. The recorded handle can be used as + * long as the containing css remains accessible. + */ + unsigned int file_offset; + + /* * Fields used for internal bookkeeping. Initialized automatically * during registration. */ @@ -419,7 +437,6 @@ struct cgroup_subsys { struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); - int disabled; int early_init; /* @@ -473,8 +490,31 @@ struct cgroup_subsys { unsigned int depends_on; }; -void cgroup_threadgroup_change_begin(struct task_struct *tsk); -void cgroup_threadgroup_change_end(struct task_struct *tsk); +extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; + +/** + * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_begin() and allows cgroup operations to + * synchronize against threadgroup changes using a percpu_rw_semaphore. + */ +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) +{ + percpu_down_read(&cgroup_threadgroup_rwsem); +} + +/** + * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_end(). Counterpart of + * cgroup_threadcgroup_change_begin(). + */ +static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) +{ + percpu_up_read(&cgroup_threadgroup_rwsem); +} #else /* CONFIG_CGROUPS */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index eb7ca55f72ef..e9c3eac074e2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -17,6 +17,7 @@ #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/kernfs.h> +#include <linux/jump_label.h> #include <linux/cgroup-defs.h> @@ -50,6 +51,26 @@ extern struct css_set init_css_set; #include <linux/cgroup_subsys.h> #undef SUBSYS +#define SUBSYS(_x) \ + extern struct static_key_true _x ## _cgrp_subsys_enabled_key; \ + extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key; +#include <linux/cgroup_subsys.h> +#undef SUBSYS + +/** + * cgroup_subsys_enabled - fast test on whether a subsys is enabled + * @ss: subsystem in question + */ +#define cgroup_subsys_enabled(ss) \ + static_branch_likely(&ss ## _enabled_key) + +/** + * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy + * @ss: subsystem in question + */ +#define cgroup_subsys_on_dfl(ss) \ + static_branch_likely(&ss ## _on_dfl_key) + bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, @@ -211,11 +232,33 @@ void css_task_iter_end(struct css_task_iter *it); * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor * @tset: taskset to iterate + * + * @tset may contain multiple tasks and they may belong to multiple + * processes. When there are multiple tasks in @tset, if a task of a + * process is in @tset, all tasks of the process are in @tset. Also, all + * are guaranteed to share the same source and destination csses. + * + * Iteration is not in any specific order. */ #define cgroup_taskset_for_each(task, tset) \ for ((task) = cgroup_taskset_first((tset)); (task); \ (task) = cgroup_taskset_next((tset))) +/** + * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset + * @leader: the loop cursor + * @tset: takset to iterate + * + * Iterate threadgroup leaders of @tset. For single-task migrations, @tset + * may not contain any. + */ +#define cgroup_taskset_for_each_leader(leader, tset) \ + for ((leader) = cgroup_taskset_first((tset)); (leader); \ + (leader) = cgroup_taskset_next((tset))) \ + if ((leader) != (leader)->group_leader) \ + ; \ + else + /* * Inline functions. */ @@ -412,64 +455,6 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, return task_css(task, subsys_id)->cgroup; } -/** - * cgroup_on_dfl - test whether a cgroup is on the default hierarchy - * @cgrp: the cgroup of interest - * - * The default hierarchy is the v2 interface of cgroup and this function - * can be used to test whether a cgroup is on the default hierarchy for - * cases where a subsystem should behave differnetly depending on the - * interface version. - * - * The set of behaviors which change on the default hierarchy are still - * being determined and the mount option is prefixed with __DEVEL__. - * - * List of changed behaviors: - * - * - Mount options "noprefix", "xattr", "clone_children", "release_agent" - * and "name" are disallowed. - * - * - When mounting an existing superblock, mount options should match. - * - * - Remount is disallowed. - * - * - rename(2) is disallowed. - * - * - "tasks" is removed. Everything should be at process granularity. Use - * "cgroup.procs" instead. - * - * - "cgroup.procs" is not sorted. pids will be unique unless they got - * recycled inbetween reads. - * - * - "release_agent" and "notify_on_release" are removed. Replacement - * notification mechanism will be implemented. - * - * - "cgroup.clone_children" is removed. - * - * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup - * and its descendants contain no task; otherwise, 1. The file also - * generates kernfs notification which can be monitored through poll and - * [di]notify when the value of the file changes. - * - * - cpuset: tasks will be kept in empty cpusets when hotplug happens and - * take masks of ancestors with non-empty cpus/mems, instead of being - * moved to an ancestor. - * - * - cpuset: a task can be moved into an empty cpuset, and again it takes - * masks of ancestors. - * - * - memcg: use_hierarchy is on by default and the cgroup file for the flag - * is not created. - * - * - blkcg: blk-throttle becomes properly hierarchical. - * - * - debug: disallowed on the default hierarchy. - */ -static inline bool cgroup_on_dfl(const struct cgroup *cgrp) -{ - return cgrp->root == &cgrp_dfl_root; -} - /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_has_tasks(struct cgroup *cgrp) { @@ -527,6 +512,19 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } +/** + * cgroup_file_notify - generate a file modified event for a cgroup_file + * @cfile: target cgroup_file + * + * @cfile must have been obtained by setting cftype->file_offset. + */ +static inline void cgroup_file_notify(struct cgroup_file *cfile) +{ + /* might not have been created due to one of the CFTYPE selector flags */ + if (cfile->kn) + kernfs_notify(cfile->kn); +} + #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index bcc853eccc85..7edd30515298 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -48,9 +48,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) static inline bool hugetlb_cgroup_disabled(void) { - if (hugetlb_cgrp_subsys.disabled) - return true; - return false; + return !cgroup_subsys_enabled(hugetlb_cgrp_subsys); } extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e38681f4912d..d0b380ee7d67 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,13 +25,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; -#ifdef CONFIG_CGROUPS -#define INIT_GROUP_RWSEM(sig) \ - .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), -#else -#define INIT_GROUP_RWSEM(sig) -#endif - #ifdef CONFIG_CPUSETS #define INIT_CPUSET_SEQ(tsk) \ .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), @@ -64,7 +57,6 @@ extern struct fs_struct init_fs; INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ - INIT_GROUP_RWSEM(sig) \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7f653e8f6690..c9ca050de846 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -216,11 +216,6 @@ static inline int jump_label_apply_nops(struct module *mod) #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled -static inline bool static_key_enabled(struct static_key *key) -{ - return static_key_count(key) > 0; -} - static inline void static_key_enable(struct static_key *key) { int count = static_key_count(key); @@ -267,6 +262,17 @@ struct static_key_false { #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT +extern bool ____wrong_branch_error(void); + +#define static_key_enabled(x) \ +({ \ + if (!__builtin_types_compatible_p(typeof(*x), struct static_key) && \ + !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\ + !__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ + ____wrong_branch_error(); \ + static_key_count((struct static_key *)x) > 0; \ +}) + #ifdef HAVE_JUMP_LABEL /* @@ -325,8 +331,6 @@ struct static_key_false { * See jump_label_type() / jump_label_init_type(). */ -extern bool ____wrong_branch_error(void); - #define static_branch_likely(x) \ ({ \ bool branch; \ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ad800e62cb7a..c83c699a6605 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -213,6 +213,9 @@ struct mem_cgroup { /* OOM-Killer disable */ int oom_kill_disable; + /* handle for "memory.events" */ + struct cgroup_file events_file; + /* protect arrays of thresholds */ struct mutex thresholds_lock; @@ -286,6 +289,7 @@ static inline void mem_cgroup_events(struct mem_cgroup *memcg, unsigned int nr) { this_cpu_add(memcg->stat->events[idx], nr); + cgroup_file_notify(&memcg->events_file); } bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); @@ -347,9 +351,7 @@ ino_t page_cgroup_ino(struct page *page); static inline bool mem_cgroup_disabled(void) { - if (memory_cgrp_subsys.disabled) - return true; - return false; + return !cgroup_subsys_enabled(memory_cgrp_subsys); } /* diff --git a/include/linux/sched.h b/include/linux/sched.h index b7b9501b41af..a4ab9daa387c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -762,18 +762,6 @@ struct signal_struct { unsigned audit_tty_log_passwd; struct tty_audit_buf *tty_audit_buf; #endif -#ifdef CONFIG_CGROUPS - /* - * group_rwsem prevents new tasks from entering the threadgroup and - * member tasks from exiting,a more specifically, setting of - * PF_EXITING. fork and exit paths are protected with this rwsem - * using threadgroup_change_begin/end(). Users which require - * threadgroup to remain stable should use threadgroup_[un]lock() - * which also takes care of exec path. Currently, cgroup is the - * only user. - */ - struct rw_semaphore group_rwsem; -#endif oom_flags_t oom_flags; short oom_score_adj; /* OOM kill score adjustment */ |