From 3ff195b011d7decf501a4d55aeed312731094796 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 11:31:26 -0700 Subject: sysfs: Implement sysfs tagged directory support. The problem. When implementing a network namespace I need to be able to have multiple network devices with the same name. Currently this is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and potentially a few other directories of the form /sys/ ... /net/*. What this patch does is to add an additional tag field to the sysfs dirent structure. For directories that should show different contents depending on the context such as /sys/class/net/, and /sys/devices/virtual/net/ this tag field is used to specify the context in which those directories should be visible. Effectively this is the same as creating multiple distinct directories with the same name but internally to sysfs the result is nicer. I am calling the concept of a single directory that looks like multiple directories all at the same path in the filesystem tagged directories. For the networking namespace the set of directories whose contents I need to filter with tags can depend on the presence or absence of hotplug hardware or which modules are currently loaded. Which means I need a simple race free way to setup those directories as tagged. To achieve a reace free design all tagged directories are created and managed by sysfs itself. Users of this interface: - define a type in the sysfs_tag_type enumeration. - call sysfs_register_ns_types with the type and it's operations - sysfs_exit_ns when an individual tag is no longer valid - Implement mount_ns() which returns the ns of the calling process so we can attach it to a sysfs superblock. - Implement ktype.namespace() which returns the ns of a syfs kobject. Everything else is left up to sysfs and the driver layer. For the network namespace mount_ns and namespace() are essentially one line functions, and look to remain that. Tags are currently represented a const void * pointers as that is both generic, prevides enough information for equality comparisons, and is trivial to create for current users, as it is just the existing namespace pointer. The work needed in sysfs is more extensive. At each directory or symlink creating I need to check if the directory it is being created in is a tagged directory and if so generate the appropriate tag to place on the sysfs_dirent. Likewise at each symlink or directory removal I need to check if the sysfs directory it is being removed from is a tagged directory and if so figure out which tag goes along with the name I am deleting. Currently only directories which hold kobjects, and symlinks are supported. There is not enough information in the current file attribute interfaces to give us anything to discriminate on which makes it useless, and there are no potential users which makes it an uninteresting problem to solve. Signed-off-by: Eric W. Biederman Signed-off-by: Benjamin Thery Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 112 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 88 insertions(+), 24 deletions(-) (limited to 'fs/sysfs/dir.c') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 590717861c7..b2b83067ccc 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -380,9 +380,15 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { struct sysfs_inode_attrs *ps_iattr; - if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) + if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) return -EEXIST; + if (sysfs_ns_type(acxt->parent_sd) && !sd->s_ns) { + WARN(1, KERN_WARNING "sysfs: ns required in '%s' for '%s'\n", + acxt->parent_sd->s_name, sd->s_name); + return -EINVAL; + } + sd->s_parent = sysfs_get(acxt->parent_sd); sysfs_link_sibling(sd); @@ -533,13 +539,17 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) * Pointer to sysfs_dirent if found, NULL if not. */ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, + const void *ns, const unsigned char *name) { struct sysfs_dirent *sd; - for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) + for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) { + if (sd->s_ns != ns) + continue; if (!strcmp(sd->s_name, name)) return sd; + } return NULL; } @@ -558,12 +568,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, * Pointer to sysfs_dirent if found, NULL if not. */ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const void *ns, const unsigned char *name) { struct sysfs_dirent *sd; mutex_lock(&sysfs_mutex); - sd = sysfs_find_dirent(parent_sd, name); + sd = sysfs_find_dirent(parent_sd, ns, name); sysfs_get(sd); mutex_unlock(&sysfs_mutex); @@ -572,7 +583,8 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, EXPORT_SYMBOL_GPL(sysfs_get_dirent); static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, - const char *name, struct sysfs_dirent **p_sd) + enum kobj_ns_type type, const void *ns, const char *name, + struct sysfs_dirent **p_sd) { umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; struct sysfs_addrm_cxt acxt; @@ -583,6 +595,9 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, sd = sysfs_new_dirent(name, mode, SYSFS_DIR); if (!sd) return -ENOMEM; + + sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT); + sd->s_ns = ns; sd->s_dir.kobj = kobj; /* link in */ @@ -601,7 +616,25 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd) { - return create_dir(kobj, kobj->sd, name, p_sd); + return create_dir(kobj, kobj->sd, + KOBJ_NS_TYPE_NONE, NULL, name, p_sd); +} + +static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) +{ + const struct kobj_ns_type_operations *ops; + enum kobj_ns_type type; + + ops = kobj_child_ns_ops(kobj); + if (!ops) + return KOBJ_NS_TYPE_NONE; + + type = ops->type; + BUG_ON(type <= KOBJ_NS_TYPE_NONE); + BUG_ON(type >= KOBJ_NS_TYPES); + BUG_ON(!kobj_ns_type_registered(type)); + + return type; } /** @@ -610,7 +643,9 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, */ int sysfs_create_dir(struct kobject * kobj) { + enum kobj_ns_type type; struct sysfs_dirent *parent_sd, *sd; + const void *ns = NULL; int error = 0; BUG_ON(!kobj); @@ -620,7 +655,11 @@ int sysfs_create_dir(struct kobject * kobj) else parent_sd = &sysfs_root; - error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); + if (sysfs_ns_type(parent_sd)) + ns = kobj->ktype->namespace(kobj); + type = sysfs_read_ns_type(kobj); + + error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd); if (!error) kobj->sd = sd; return error; @@ -630,13 +669,19 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *ret = NULL; - struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata; + struct dentry *parent = dentry->d_parent; + struct sysfs_dirent *parent_sd = parent->d_fsdata; struct sysfs_dirent *sd; struct inode *inode; + enum kobj_ns_type type; + const void *ns; mutex_lock(&sysfs_mutex); - sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); + type = sysfs_ns_type(parent_sd); + ns = sysfs_info(dir->i_sb)->ns[type]; + + sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name); /* no such entry */ if (!sd) { @@ -735,7 +780,8 @@ void sysfs_remove_dir(struct kobject * kobj) } int sysfs_rename(struct sysfs_dirent *sd, - struct sysfs_dirent *new_parent_sd, const char *new_name) + struct sysfs_dirent *new_parent_sd, const void *new_ns, + const char *new_name) { const char *dup_name = NULL; int error; @@ -743,12 +789,12 @@ int sysfs_rename(struct sysfs_dirent *sd, mutex_lock(&sysfs_mutex); error = 0; - if ((sd->s_parent == new_parent_sd) && + if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) && (strcmp(sd->s_name, new_name) == 0)) goto out; /* nothing to rename */ error = -EEXIST; - if (sysfs_find_dirent(new_parent_sd, new_name)) + if (sysfs_find_dirent(new_parent_sd, new_ns, new_name)) goto out; /* rename sysfs_dirent */ @@ -770,6 +816,7 @@ int sysfs_rename(struct sysfs_dirent *sd, sd->s_parent = new_parent_sd; sysfs_link_sibling(sd); } + sd->s_ns = new_ns; error = 0; out: @@ -780,19 +827,28 @@ int sysfs_rename(struct sysfs_dirent *sd, int sysfs_rename_dir(struct kobject *kobj, const char *new_name) { - return sysfs_rename(kobj->sd, kobj->sd->s_parent, new_name); + struct sysfs_dirent *parent_sd = kobj->sd->s_parent; + const void *new_ns = NULL; + + if (sysfs_ns_type(parent_sd)) + new_ns = kobj->ktype->namespace(kobj); + + return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name); } int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) { struct sysfs_dirent *sd = kobj->sd; struct sysfs_dirent *new_parent_sd; + const void *new_ns = NULL; BUG_ON(!sd->s_parent); + if (sysfs_ns_type(sd->s_parent)) + new_ns = kobj->ktype->namespace(kobj); new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; - return sysfs_rename(sd, new_parent_sd, sd->s_name); + return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name); } /* Relationship between s_mode and the DT_xxx types */ @@ -807,32 +863,35 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp) return 0; } -static struct sysfs_dirent *sysfs_dir_pos(struct sysfs_dirent *parent_sd, - ino_t ino, struct sysfs_dirent *pos) +static struct sysfs_dirent *sysfs_dir_pos(const void *ns, + struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) { if (pos) { int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && pos->s_parent == parent_sd && ino == pos->s_ino; sysfs_put(pos); - if (valid) - return pos; + if (!valid) + pos = NULL; } - pos = NULL; - if ((ino > 1) && (ino < INT_MAX)) { + if (!pos && (ino > 1) && (ino < INT_MAX)) { pos = parent_sd->s_dir.children; while (pos && (ino > pos->s_ino)) pos = pos->s_sibling; } + while (pos && pos->s_ns != ns) + pos = pos->s_sibling; return pos; } -static struct sysfs_dirent *sysfs_dir_next_pos(struct sysfs_dirent *parent_sd, - ino_t ino, struct sysfs_dirent *pos) +static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, + struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) { - pos = sysfs_dir_pos(parent_sd, ino, pos); + pos = sysfs_dir_pos(ns, parent_sd, ino, pos); if (pos) pos = pos->s_sibling; + while (pos && pos->s_ns != ns) + pos = pos->s_sibling; return pos; } @@ -841,8 +900,13 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) struct dentry *dentry = filp->f_path.dentry; struct sysfs_dirent * parent_sd = dentry->d_fsdata; struct sysfs_dirent *pos = filp->private_data; + enum kobj_ns_type type; + const void *ns; ino_t ino; + type = sysfs_ns_type(parent_sd); + ns = sysfs_info(dentry->d_sb)->ns[type]; + if (filp->f_pos == 0) { ino = parent_sd->s_ino; if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) @@ -857,9 +921,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) filp->f_pos++; } mutex_lock(&sysfs_mutex); - for (pos = sysfs_dir_pos(parent_sd, filp->f_pos, pos); + for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); pos; - pos = sysfs_dir_next_pos(parent_sd, filp->f_pos, pos)) { + pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) { const char * name; unsigned int type; int len, ret; -- cgit v1.2.3 From af10ec77b43335ab4e473e4087d85979caf02d65 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 11:31:27 -0700 Subject: sysfs: Add support for tagged directories with untagged members. I had hopped to avoid this but the bonding driver adds a file to /sys/class/net/ and the easiest way to handle that file is to make it untagged and to register it only once. So relax the rules on tagged directories, and make bonding work. Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 12 +++--------- fs/sysfs/inode.c | 2 ++ 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'fs/sysfs/dir.c') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index b2b83067ccc..a63eb4ba786 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -383,12 +383,6 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) return -EEXIST; - if (sysfs_ns_type(acxt->parent_sd) && !sd->s_ns) { - WARN(1, KERN_WARNING "sysfs: ns required in '%s' for '%s'\n", - acxt->parent_sd->s_name, sd->s_name); - return -EINVAL; - } - sd->s_parent = sysfs_get(acxt->parent_sd); sysfs_link_sibling(sd); @@ -545,7 +539,7 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, struct sysfs_dirent *sd; for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) { - if (sd->s_ns != ns) + if (ns && sd->s_ns && (sd->s_ns != ns)) continue; if (!strcmp(sd->s_name, name)) return sd; @@ -879,7 +873,7 @@ static struct sysfs_dirent *sysfs_dir_pos(const void *ns, while (pos && (ino > pos->s_ino)) pos = pos->s_sibling; } - while (pos && pos->s_ns != ns) + while (pos && pos->s_ns && pos->s_ns != ns) pos = pos->s_sibling; return pos; } @@ -890,7 +884,7 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, pos = sysfs_dir_pos(ns, parent_sd, ino, pos); if (pos) pos = pos->s_sibling; - while (pos && pos->s_ns != ns) + while (pos && pos->s_ns && pos->s_ns != ns) pos = pos->s_sibling; return pos; } diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index cf2bad1462e..bbd77e95cf7 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -335,6 +335,8 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha sysfs_addrm_start(&acxt, dir_sd); sd = sysfs_find_dirent(dir_sd, ns, name); + if (sd && (sd->s_ns != ns)) + sd = NULL; if (sd) sysfs_remove_one(&acxt, sd); -- cgit v1.2.3 From be867b194a3ae3c680c29521287ae49b4d44d420 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 3 May 2010 16:23:15 -0500 Subject: sysfs: Comment sysfs directory tagging logic Add some in-line comments to explain the new infrastructure, which was introduced to support sysfs directory tagging with namespaces. I think an overall description someplace might be good too, but it didn't really seem to fit into Documentation/filesystems/sysfs.txt, which appears more geared toward users, rather than maintainers, of sysfs. (Tejun, please let me know if I can make anything clearer or failed altogether to comment something that should be commented.) Signed-off-by: Serge E. Hallyn Cc: Eric W. Biederman Cc: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 8 ++++++++ fs/sysfs/sysfs.h | 13 ++++++++++++- include/linux/kobject.h | 11 +++++++++++ include/linux/sysfs.h | 1 + lib/kobject.c | 11 +++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) (limited to 'fs/sysfs/dir.c') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index a63eb4ba786..7e54bac8c4b 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -614,6 +614,14 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, KOBJ_NS_TYPE_NONE, NULL, name, p_sd); } +/** + * sysfs_read_ns_type: return associated ns_type + * @kobj: the kobject being queried + * + * Each kobject can be tagged with exactly one namespace type + * (i.e. network or user). Return the ns_type associated with + * this object if any + */ static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) { const struct kobj_ns_type_operations *ops; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 93847d54c2e..6a13105b559 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -58,7 +58,7 @@ struct sysfs_dirent { struct sysfs_dirent *s_sibling; const char *s_name; - const void *s_ns; + const void *s_ns; /* namespace tag */ union { struct sysfs_elem_dir s_dir; struct sysfs_elem_symlink s_symlink; @@ -82,6 +82,7 @@ struct sysfs_dirent { #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) #define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) +/* identify any namespace tag on sysfs_dirents */ #define SYSFS_NS_TYPE_MASK 0xff00 #define SYSFS_NS_TYPE_SHIFT 8 @@ -93,6 +94,10 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd) return sd->s_flags & SYSFS_TYPE_MASK; } +/* + * Return any namespace tags on this dirent. + * enum kobj_ns_type is defined in linux/kobject.h + */ static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd) { return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT; @@ -123,6 +128,12 @@ struct sysfs_addrm_cxt { /* * mount.c */ + +/* + * Each sb is associated with a set of namespace tags (i.e. + * the network namespace of the task which mounted this sysfs + * instance). + */ struct sysfs_super_info { const void *ns[KOBJ_NS_TYPES]; }; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index d9456f69904..b60d2dfe4e6 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -136,12 +136,23 @@ struct kobj_attribute { extern const struct sysfs_ops kobj_sysfs_ops; +/* + * Namespace types which are used to tag kobjects and sysfs entries. + * Network namespace will likely be the first. + */ enum kobj_ns_type { KOBJ_NS_TYPE_NONE = 0, KOBJ_NS_TYPES }; struct sock; + +/* + * Callbacks so sysfs can determine namespaces + * @current_ns: return calling task's namespace + * @netlink_ns: return namespace to which a sock belongs (right?) + * @initial_ns: return the initial namespace (i.e. init_net_ns) + */ struct kobj_ns_type_operations { enum kobj_ns_type type; const void *(*current_ns)(void); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 976c4664b21..47f1c95b529 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -178,6 +178,7 @@ struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); void sysfs_put(struct sysfs_dirent *sd); void sysfs_printk_last_file(void); +/* Called to clear a ns tag when it is no longer valid */ void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); int __must_check sysfs_init(void); diff --git a/lib/kobject.c b/lib/kobject.c index b2c6d1f56e6..f07c57252e8 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -948,6 +948,17 @@ const void *kobj_ns_initial(enum kobj_ns_type type) return ns; } +/* + * kobj_ns_exit - invalidate a namespace tag + * + * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) + * @ns: the actual namespace being invalidated + * + * This is called when a tag is no longer valid. For instance, + * when a network namespace exits, it uses this helper to + * make sure no sb's sysfs_info points to the now-invalidated + * netns. + */ void kobj_ns_exit(enum kobj_ns_type type, const void *ns) { sysfs_exit_ns(type, ns); -- cgit v1.2.3