diff options
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/cgroup.h | 170 | ||||
-rw-r--r-- | include/linux/cpuset.h | 1 | ||||
-rw-r--r-- | include/linux/res_counter.h | 2 |
3 files changed, 153 insertions, 20 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 470073bf93d0..d86e215ca2b8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -19,6 +19,7 @@ #include <linux/idr.h> #include <linux/workqueue.h> #include <linux/xattr.h> +#include <linux/fs.h> #ifdef CONFIG_CGROUPS @@ -30,10 +31,6 @@ struct css_id; extern int cgroup_init_early(void); extern int cgroup_init(void); -extern void cgroup_lock(void); -extern int cgroup_lock_is_held(void); -extern bool cgroup_lock_live_group(struct cgroup *cgrp); -extern void cgroup_unlock(void); extern void cgroup_fork(struct task_struct *p); extern void cgroup_post_fork(struct task_struct *p); extern void cgroup_exit(struct task_struct *p, int run_callbacks); @@ -44,14 +41,25 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss); extern const struct file_operations proc_cgroup_operations; -/* Define the enumeration of all builtin cgroup subsystems */ +/* + * Define the enumeration of all cgroup subsystems. + * + * We define ids for builtin subsystems and then modular ones. + */ #define SUBSYS(_x) _x ## _subsys_id, -#define IS_SUBSYS_ENABLED(option) IS_ENABLED(option) enum cgroup_subsys_id { +#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) +#include <linux/cgroup_subsys.h> +#undef IS_SUBSYS_ENABLED + CGROUP_BUILTIN_SUBSYS_COUNT, + + __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1, + +#define IS_SUBSYS_ENABLED(option) IS_MODULE(option) #include <linux/cgroup_subsys.h> +#undef IS_SUBSYS_ENABLED CGROUP_SUBSYS_COUNT, }; -#undef IS_SUBSYS_ENABLED #undef SUBSYS /* Per-subsystem/per-cgroup state maintained by the system. */ @@ -148,6 +156,13 @@ enum { * specified at mount time and thus is implemented here. */ CGRP_CPUSET_CLONE_CHILDREN, + /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */ + CGRP_SANE_BEHAVIOR, +}; + +struct cgroup_name { + struct rcu_head rcu_head; + char name[]; }; struct cgroup { @@ -172,11 +187,23 @@ struct cgroup { struct cgroup *parent; /* my parent */ struct dentry *dentry; /* cgroup fs entry, RCU protected */ + /* + * This is a copy of dentry->d_name, and it's needed because + * we can't use dentry->d_name in cgroup_path(). + * + * You must acquire rcu_read_lock() to access cgrp->name, and + * the only place that can change it is rename(), which is + * protected by parent dir's i_mutex. + * + * Normally you should use cgroup_name() wrapper rather than + * access it directly. + */ + struct cgroup_name __rcu *name; + /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; struct cgroupfs_root *root; - struct cgroup *top_cgroup; /* * List of cg_cgroup_links pointing at css_sets with @@ -213,6 +240,96 @@ struct cgroup { struct simple_xattrs xattrs; }; +#define MAX_CGROUP_ROOT_NAMELEN 64 + +/* cgroupfs_root->flags */ +enum { + /* + * Unfortunately, cgroup core and various controllers are riddled + * with idiosyncrasies and pointless options. The following flag, + * when set, will force sane behavior - some options are forced on, + * others are disallowed, and some controllers will change their + * hierarchical or other behaviors. + * + * The set of behaviors affected by this flag are still being + * determined and developed and the mount option for this flag is + * prefixed with __DEVEL__. The prefix will be dropped once we + * reach the point where all behaviors are compatible with the + * planned unified hierarchy, which will automatically turn on this + * flag. + * + * The followings are the behaviors currently affected this flag. + * + * - Mount options "noprefix" and "clone_children" are disallowed. + * Also, cgroupfs file cgroup.clone_children is not created. + * + * - When mounting an existing superblock, mount options should + * match. + * + * - Remount is disallowed. + * + * - memcg: use_hierarchy is on by default and the cgroup file for + * the flag is not created. + * + * The followings are planned changes. + * + * - release_agent will be disallowed once replacement notification + * mechanism is implemented. + */ + CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), + + CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ + CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ +}; + +/* + * A cgroupfs_root represents the root of a cgroup hierarchy, and may be + * associated with a superblock to form an active hierarchy. This is + * internal to cgroup core. Don't access directly from controllers. + */ +struct cgroupfs_root { + struct super_block *sb; + + /* + * The bitmask of subsystems intended to be attached to this + * hierarchy + */ + unsigned long subsys_mask; + + /* Unique id for this hierarchy. */ + int hierarchy_id; + + /* The bitmask of subsystems currently attached to this hierarchy */ + unsigned long actual_subsys_mask; + + /* A list running through the attached subsystems */ + struct list_head subsys_list; + + /* The root cgroup for this hierarchy */ + struct cgroup top_cgroup; + + /* Tracks how many cgroups are currently defined in hierarchy.*/ + int number_of_cgroups; + + /* A list running through the active hierarchies */ + struct list_head root_list; + + /* All cgroups on this root, cgroup_mutex protected */ + struct list_head allcg_list; + + /* Hierarchy-specific flags */ + unsigned long flags; + + /* IDs for cgroups in this hierarchy */ + struct ida cgroup_ida; + + /* The path to use for release notifications. */ + char release_agent_path[PATH_MAX]; + + /* The name for this hierarchy - may be empty */ + char name[MAX_CGROUP_ROOT_NAMELEN]; +}; + /* * A css_set is a structure holding pointers to a set of * cgroup_subsys_state objects. This saves space in the task struct @@ -278,6 +395,7 @@ struct cgroup_map_cb { /* cftype->flags */ #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ #define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ +#define CFTYPE_INSANE (1U << 2) /* don't create if sane_behavior */ #define MAX_CFTYPE_NAME 64 @@ -304,9 +422,6 @@ struct cftype { /* CFTYPE_* flags */ unsigned int flags; - /* file xattrs */ - struct simple_xattrs xattrs; - int (*open)(struct inode *inode, struct file *file); ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -404,18 +519,31 @@ struct cgroup_scanner { void *data; }; +/* + * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This + * function can be called as long as @cgrp is accessible. + */ +static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) +{ + return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; +} + +/* Caller should hold rcu_read_lock() */ +static inline const char *cgroup_name(const struct cgroup *cgrp) +{ + return rcu_dereference(cgrp->name)->name; +} + int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_is_removed(const struct cgroup *cgrp); +bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); int cgroup_task_count(const struct cgroup *cgrp); -/* Return true if cgrp is a descendant of the task's cgroup */ -int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); - /* * Control Group taskset, used to pass around set of tasks to cgroup_subsys * methods. @@ -523,10 +651,16 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( * rcu_dereference_check() conditions, such as locks used during the * cgroup_subsys::attach() methods. */ +#ifdef CONFIG_PROVE_RCU +extern struct mutex cgroup_mutex; +#define task_subsys_state_check(task, subsys_id, __c) \ + rcu_dereference_check((task)->cgroups->subsys[(subsys_id)], \ + lockdep_is_held(&(task)->alloc_lock) || \ + lockdep_is_held(&cgroup_mutex) || (__c)) +#else #define task_subsys_state_check(task, subsys_id, __c) \ - rcu_dereference_check(task->cgroups->subsys[subsys_id], \ - lockdep_is_held(&task->alloc_lock) || \ - cgroup_lock_is_held() || (__c)) + rcu_dereference((task)->cgroups->subsys[(subsys_id)]) +#endif static inline struct cgroup_subsys_state * task_subsys_state(struct task_struct *task, int subsys_id) @@ -661,8 +795,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, struct cgroup_iter *it); void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); -int cgroup_attach_task(struct cgroup *, struct task_struct *); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); +int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 8c8a60d29407..ccd1de8ad822 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -11,7 +11,6 @@ #include <linux/sched.h> #include <linux/cpumask.h> #include <linux/nodemask.h> -#include <linux/cgroup.h> #include <linux/mm.h> #ifdef CONFIG_CPUSETS diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index c23099413ad6..96a509b6be04 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -13,7 +13,7 @@ * info about what this counter is. */ -#include <linux/cgroup.h> +#include <linux/spinlock.h> #include <linux/errno.h> /* |