From 8231f2f99a5e5fc45a25e8de09fd1ab9711babf1 Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Wed, 14 Jan 2009 15:45:13 +0800 Subject: SYSFS: use standard magic.h for sysfs SYSFS_MAGIC has been added into magic.h, so only use that definition in magic.h to avoid potential consistency problem. Signed-off-by: Qinghuang Feng Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/mount.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index ab343e371d64..8133ca36ee0e 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -17,11 +17,10 @@ #include #include #include +#include #include "sysfs.h" -/* Random magic number */ -#define SYSFS_MAGIC 0x62656572 static struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; -- cgit v1.2.3 From 4a67a1bc0b3a0db017b560cee27370d141c58e25 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 21 Jan 2009 11:55:11 -0800 Subject: sysfs: Take sysfs_mutex when fetching the root inode. sysfs_get_inode ultimately calls sysfs_count_nlink when the a directory inode is fectched. sysfs_count_nlink needs to be called under the sysfs_mutex to guard against the unlikely but possible scenario that the root directory is changing as we are counting the number entries in it, and just in general to be consistent. Signed-off-by: Eric W. Biederman Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/mount.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 8133ca36ee0e..84ef378673a8 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -52,7 +52,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) sysfs_sb = sb; /* get root inode, initialize and unlock it */ + mutex_lock(&sysfs_mutex); inode = sysfs_get_inode(&sysfs_root); + mutex_unlock(&sysfs_mutex); if (!inode) { pr_debug("sysfs: could not get root inode\n"); return -ENOMEM; -- cgit v1.2.3 From 425cb02912d1095febfeaf8d379af7b2ac9e4a89 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Thu, 12 Feb 2009 10:56:59 -0700 Subject: sysfs: sysfs_add_one WARNs with full path to duplicate filename sysfs: sysfs_add_one WARNs with full path to duplicate filename As a debugging aid, it can be useful to know the full path to a duplicate file being created in sysfs. We now will display warnings such as: sysfs: cannot create duplicate filename '/foo' when attempting to create multiple files named 'foo' in the sysfs root, or: sysfs: cannot create duplicate filename '/bus/pci/slots/5/foo' when attempting to create multiple files named 'foo' under a given directory in sysfs. The path displayed is always a relative path to sysfs_root. The leading '/' in the path name refers to the sysfs_root mount point, and should not be confused with the "real" '/'. Thanks to Alex Williamson for essentially writing sysfs_pathname. Cc: Alex Williamson Signed-off-by: Alex Chiang Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 82d3b79d0e08..f13d852ab3c1 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -433,6 +433,26 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) return 0; } +/** + * sysfs_pathname - return full path to sysfs dirent + * @sd: sysfs_dirent whose path we want + * @path: caller allocated buffer + * + * Gives the name "/" to the sysfs_root entry; any path returned + * is relative to wherever sysfs is mounted. + * + * XXX: does no error checking on @path size + */ +static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) +{ + if (sd->s_parent) { + sysfs_pathname(sd->s_parent, path); + strcat(path, "/"); + } + strcat(path, sd->s_name); + return path; +} + /** * sysfs_add_one - add sysfs_dirent to parent * @acxt: addrm context to use @@ -458,8 +478,16 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) int ret; ret = __sysfs_add_one(acxt, sd); - WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' " - "can not be created\n", sd->s_name); + if (ret == -EEXIST) { + char *path = kzalloc(PATH_MAX, GFP_KERNEL); + WARN(1, KERN_WARNING + "sysfs: cannot create duplicate filename '%s'\n", + (path == NULL) ? sd->s_name : + strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"), + sd->s_name)); + kfree(path); + } + return ret; } -- cgit v1.2.3 From 04256b4a8fc73f54cd14f20867882c299728a446 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 Feb 2009 13:20:23 -0800 Subject: sysfs: reference sysfs_dirent from sysfs inodes The sysfs_dirent serves as both an inode and a directory entry for sysfs. To prevent the sysfs inode numbers from being freed prematurely hold a reference to sysfs_dirent from the sysfs inode. [akpm@linux-foundation.org: add comment] Signed-off-by: Eric W. Biederman Cc: Tejun Heo Cc: Al Viro Cc: Cornelia Huck Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/inode.c | 17 +++++++++++++++++ fs/sysfs/mount.c | 1 + fs/sysfs/sysfs.h | 1 + 3 files changed, 19 insertions(+) (limited to 'fs') diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index dfa3d94cfc74..555f0ff988df 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -147,6 +147,7 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) { struct bin_attribute *bin_attr; + inode->i_private = sysfs_get(sd); inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; inode->i_op = &sysfs_inode_operations; @@ -214,6 +215,22 @@ struct inode * sysfs_get_inode(struct sysfs_dirent *sd) return inode; } +/* + * The sysfs_dirent serves as both an inode and a directory entry for sysfs. + * To prevent the sysfs inode numbers from being freed prematurely we take a + * reference to sysfs_dirent from the sysfs inode. A + * super_operations.delete_inode() implementation is needed to drop that + * reference upon inode destruction. + */ +void sysfs_delete_inode(struct inode *inode) +{ + struct sysfs_dirent *sd = inode->i_private; + + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + sysfs_put(sd); +} + int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) { struct sysfs_addrm_cxt acxt; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 84ef378673a8..49749955ccaf 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -29,6 +29,7 @@ struct kmem_cache *sysfs_dir_cachep; static const struct super_operations sysfs_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, + .delete_inode = sysfs_delete_inode, }; struct sysfs_dirent sysfs_root = { diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 93c6d6b27c4d..9055d04e4ab0 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -145,6 +145,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd) * inode.c */ struct inode *sysfs_get_inode(struct sysfs_dirent *sd); +void sysfs_delete_inode(struct inode *inode); int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); int sysfs_inode_init(void); -- cgit v1.2.3 From e0edd3c65aa5b53e20280565a7ce11675eb7ed6b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Mar 2009 11:57:20 -0800 Subject: sysfs: don't block indefinitely for unmapped files. Modify sysfs bin files so that we can remove the bin file while they are still mapped. When the kobject is removed we unmap the bin file and arrange for future accesses to the mapping to receive SIGBUS. Implementing this prevents a nasty DOS when pci devices are hot plugged and unplugged. Where if any of their resources were mmaped the kernel could not free up their pci resources or release their pci data structures. [akpm@linux-foundation.org: remove unused var] Signed-off-by: Eric W. Biederman Cc: Jesse Barnes Acked-by: Tejun Heo Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/bin.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++---- fs/sysfs/dir.c | 1 + fs/sysfs/sysfs.h | 2 + 3 files changed, 174 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index f2c478c3424e..96cc2bf6a84e 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -21,15 +21,28 @@ #include #include #include +#include #include #include "sysfs.h" +/* + * There's one bin_buffer for each open file. + * + * filp->private_data points to bin_buffer and + * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s + * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock + */ +static DEFINE_MUTEX(sysfs_bin_lock); + struct bin_buffer { - struct mutex mutex; - void *buffer; - int mmapped; + struct mutex mutex; + void *buffer; + int mmapped; + struct vm_operations_struct *vm_ops; + struct file *file; + struct hlist_node list; }; static int @@ -168,29 +181,148 @@ out_free: return count; } +static void bin_vma_open(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + + if (!bb->vm_ops || !bb->vm_ops->open) + return; + + if (!sysfs_get_active_two(attr_sd)) + return; + + bb->vm_ops->open(vma); + + sysfs_put_active_two(attr_sd); +} + +static void bin_vma_close(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + + if (!bb->vm_ops || !bb->vm_ops->close) + return; + + if (!sysfs_get_active_two(attr_sd)) + return; + + bb->vm_ops->close(vma); + + sysfs_put_active_two(attr_sd); +} + +static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->fault) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active_two(attr_sd)) + return VM_FAULT_SIGBUS; + + ret = bb->vm_ops->fault(vma, vmf); + + sysfs_put_active_two(attr_sd); + return ret; +} + +static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->page_mkwrite) + return -EINVAL; + + if (!sysfs_get_active_two(attr_sd)) + return -EINVAL; + + ret = bb->vm_ops->page_mkwrite(vma, page); + + sysfs_put_active_two(attr_sd); + return ret; +} + +static int bin_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->access) + return -EINVAL; + + if (!sysfs_get_active_two(attr_sd)) + return -EINVAL; + + ret = bb->vm_ops->access(vma, addr, buf, len, write); + + sysfs_put_active_two(attr_sd); + return ret; +} + +static struct vm_operations_struct bin_vm_ops = { + .open = bin_vma_open, + .close = bin_vma_close, + .fault = bin_fault, + .page_mkwrite = bin_page_mkwrite, + .access = bin_access, +}; + static int mmap(struct file *file, struct vm_area_struct *vma) { struct bin_buffer *bb = file->private_data; struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; + struct vm_operations_struct *vm_ops; int rc; mutex_lock(&bb->mutex); /* need attr_sd for attr, its parent for kobj */ + rc = -ENODEV; if (!sysfs_get_active_two(attr_sd)) - return -ENODEV; + goto out_unlock; rc = -EINVAL; - if (attr->mmap) - rc = attr->mmap(kobj, attr, vma); + if (!attr->mmap) + goto out_put; - if (rc == 0 && !bb->mmapped) - bb->mmapped = 1; - else - sysfs_put_active_two(attr_sd); + rc = attr->mmap(kobj, attr, vma); + vm_ops = vma->vm_ops; + vma->vm_ops = &bin_vm_ops; + if (rc) + goto out_put; + rc = -EINVAL; + if (bb->mmapped && bb->vm_ops != vma->vm_ops) + goto out_put; + +#ifdef CONFIG_NUMA + rc = -EINVAL; + if (vm_ops && ((vm_ops->set_policy || vm_ops->get_policy || vm_ops->migrate))) + goto out_put; +#endif + + rc = 0; + bb->mmapped = 1; + bb->vm_ops = vm_ops; +out_put: + sysfs_put_active_two(attr_sd); +out_unlock: mutex_unlock(&bb->mutex); return rc; @@ -223,8 +355,13 @@ static int open(struct inode * inode, struct file * file) goto err_out; mutex_init(&bb->mutex); + bb->file = file; file->private_data = bb; + mutex_lock(&sysfs_bin_lock); + hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers); + mutex_unlock(&sysfs_bin_lock); + /* open succeeded, put active references */ sysfs_put_active_two(attr_sd); return 0; @@ -237,11 +374,12 @@ static int open(struct inode * inode, struct file * file) static int release(struct inode * inode, struct file * file) { - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct bin_buffer *bb = file->private_data; - if (bb->mmapped) - sysfs_put_active_two(attr_sd); + mutex_lock(&sysfs_bin_lock); + hlist_del(&bb->list); + mutex_unlock(&sysfs_bin_lock); + kfree(bb->buffer); kfree(bb); return 0; @@ -256,6 +394,26 @@ const struct file_operations bin_fops = { .release = release, }; + +void unmap_bin_file(struct sysfs_dirent *attr_sd) +{ + struct bin_buffer *bb; + struct hlist_node *tmp; + + if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR) + return; + + mutex_lock(&sysfs_bin_lock); + + hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) { + struct inode *inode = bb->file->f_path.dentry->d_inode; + + unmap_mapping_range(inode->i_mapping, 0, 0, 1); + } + + mutex_unlock(&sysfs_bin_lock); +} + /** * sysfs_create_bin_file - create binary file for object. * @kobj: object. diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index f13d852ab3c1..66aeb4fff0c3 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -609,6 +609,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) sysfs_drop_dentry(sd); sysfs_deactivate(sd); + unmap_bin_file(sd); sysfs_put(sd); } } diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 9055d04e4ab0..3fa0d98481e2 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -28,6 +28,7 @@ struct sysfs_elem_attr { struct sysfs_elem_bin_attr { struct bin_attribute *bin_attr; + struct hlist_head buffers; }; /* @@ -164,6 +165,7 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, * bin.c */ extern const struct file_operations bin_fops; +void unmap_bin_file(struct sysfs_dirent *attr_sd); /* * symlink.c -- cgit v1.2.3 From f67f129e519fa87f8ebd236b6336fe43f31ee141 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 1 Mar 2009 21:10:49 +0800 Subject: Driver core: implement uevent suppress in kobject This patch implements uevent suppress in kobject and removes it from struct device, based on the following ideas: 1,Uevent sending should be one attribute of kobject, so suppressing it in kobject layer is more natural than in device layer. By this way, we can do it for other objects embedded with kobject. 2,It may save several bytes for each instance of struct device.(On my omap3(32bit ARM) based box, can save 8bytes per device object) This patch also introduces dev_set|get_uevent_suppress() helpers to set and query uevent_suppress attribute in case to help kobject as private part of struct device in future. [This version is against the latest driver-core patch set of Greg,please ignore the last version.] Signed-off-by: Ming Lei Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/dock.c | 2 +- drivers/base/core.c | 2 -- drivers/base/firmware_class.c | 4 ++-- drivers/i2c/i2c-core.c | 2 +- drivers/s390/cio/chsc_sch.c | 4 ++-- drivers/s390/cio/css.c | 4 ++-- drivers/s390/cio/device.c | 4 ++-- fs/partitions/check.c | 10 +++++----- include/linux/device.h | 11 ++++++++++- include/linux/kobject.h | 1 + lib/kobject_uevent.c | 7 +++++++ 11 files changed, 33 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 35094f230b1e..7af7db1ba8c4 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -977,7 +977,7 @@ static int dock_add(acpi_handle handle) sizeof(struct dock_station *)); /* we want the dock device to send uevents */ - dock_device->dev.uevent_suppress = 0; + dev_set_uevent_suppress(&dock_device->dev, 0); if (is_dock(handle)) dock_station->flags |= DOCK_IS_DOCK; diff --git a/drivers/base/core.c b/drivers/base/core.c index a90f56f64d6f..95c67ffd71da 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -136,8 +136,6 @@ static int dev_uevent_filter(struct kset *kset, struct kobject *kobj) if (ktype == &device_ktype) { struct device *dev = to_dev(kobj); - if (dev->uevent_suppress) - return 0; if (dev->bus) return 1; if (dev->class) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 44699d9dd85c..d3a59c688fe4 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -319,7 +319,7 @@ static int fw_register_device(struct device **dev_p, const char *fw_name, f_dev->parent = device; f_dev->class = &firmware_class; dev_set_drvdata(f_dev, fw_priv); - f_dev->uevent_suppress = 1; + dev_set_uevent_suppress(f_dev, 1); retval = device_register(f_dev); if (retval) { dev_err(device, "%s: device_register failed\n", __func__); @@ -366,7 +366,7 @@ static int fw_setup_device(struct firmware *fw, struct device **dev_p, } if (uevent) - f_dev->uevent_suppress = 0; + dev_set_uevent_suppress(f_dev, 0); *dev_p = f_dev; goto out; diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index e7d984866de0..fbb9030b68a5 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -841,7 +841,7 @@ int i2c_attach_client(struct i2c_client *client) if (client->driver && !is_newstyle_driver(client->driver)) { client->dev.release = i2c_client_release; - client->dev.uevent_suppress = 1; + dev_set_uevent_suppress(&client->dev, 1); } else client->dev.release = i2c_client_dev_release; diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 0a2f2edafc03..93eca1731b81 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -84,8 +84,8 @@ static int chsc_subchannel_probe(struct subchannel *sch) kfree(private); } else { sch->private = private; - if (sch->dev.uevent_suppress) { - sch->dev.uevent_suppress = 0; + if (dev_get_uevent_suppress(&sch->dev)) { + dev_set_uevent_suppress(&sch->dev, 0); kobject_uevent(&sch->dev.kobj, KOBJ_ADD); } } diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 8019288bc6de..427d11d88069 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -272,7 +272,7 @@ static int css_register_subchannel(struct subchannel *sch) * the subchannel driver can decide itself when it wants to inform * userspace of its existence. */ - sch->dev.uevent_suppress = 1; + dev_set_uevent_suppress(&sch->dev, 1); css_update_ssd_info(sch); /* make it known to the system */ ret = css_sch_device_register(sch); @@ -287,7 +287,7 @@ static int css_register_subchannel(struct subchannel *sch) * a fitting driver module may be loaded based on the * modalias. */ - sch->dev.uevent_suppress = 0; + dev_set_uevent_suppress(&sch->dev, 0); kobject_uevent(&sch->dev.kobj, KOBJ_ADD); } return ret; diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 23d5752349b5..611d2e001dd5 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -981,7 +981,7 @@ io_subchannel_register(struct work_struct *work) * Now we know this subchannel will stay, we can throw * our delayed uevent. */ - sch->dev.uevent_suppress = 0; + dev_set_uevent_suppress(&sch->dev, 0); kobject_uevent(&sch->dev.kobj, KOBJ_ADD); /* make it known to the system */ ret = ccw_device_register(cdev); @@ -1243,7 +1243,7 @@ static int io_subchannel_probe(struct subchannel *sch) * the ccw_device and exit. This happens for all early * devices, e.g. the console. */ - sch->dev.uevent_suppress = 0; + dev_set_uevent_suppress(&sch->dev, 0); kobject_uevent(&sch->dev.kobj, KOBJ_ADD); cdev->dev.groups = ccwdev_attr_groups; device_initialize(&cdev->dev); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 6d720243f5f4..38e337d51ced 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -400,7 +400,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ - pdev->uevent_suppress = 1; + dev_set_uevent_suppress(pdev, 1); err = device_add(pdev); if (err) goto out_put; @@ -410,7 +410,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, if (!p->holder_dir) goto out_del; - pdev->uevent_suppress = 0; + dev_set_uevent_suppress(pdev, 0); if (flags & ADDPART_FLAG_WHOLEDISK) { err = device_create_file(pdev, &dev_attr_whole_disk); if (err) @@ -422,7 +422,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk supresses it */ - if (!ddev->uevent_suppress) + if (!dev_get_uevent_suppress(pdev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); return p; @@ -455,7 +455,7 @@ void register_disk(struct gendisk *disk) dev_set_name(ddev, disk->disk_name); /* delay uevents, until we scanned partition table */ - ddev->uevent_suppress = 1; + dev_set_uevent_suppress(ddev, 1); if (device_add(ddev)) return; @@ -490,7 +490,7 @@ void register_disk(struct gendisk *disk) exit: /* announce disk after possible partitions are created */ - ddev->uevent_suppress = 0; + dev_set_uevent_suppress(ddev, 0); kobject_uevent(&ddev->kobj, KOBJ_ADD); /* announce possible partitions */ diff --git a/include/linux/device.h b/include/linux/device.h index 4bea53fe8f4c..914c1016dd8f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -373,7 +373,6 @@ struct device { struct device_private *p; struct kobject kobj; - unsigned uevent_suppress:1; const char *init_name; /* initial name of the device */ struct device_type *type; @@ -465,6 +464,16 @@ static inline void dev_set_drvdata(struct device *dev, void *data) dev->driver_data = data; } +static inline unsigned int dev_get_uevent_suppress(const struct device *dev) +{ + return dev->kobj.uevent_suppress; +} + +static inline void dev_set_uevent_suppress(struct device *dev, int val) +{ + dev->kobj.uevent_suppress = val; +} + static inline int device_is_registered(struct device *dev) { return dev->kobj.state_in_sysfs; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c9c214d7bba2..58ae8e00fcdd 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -68,6 +68,7 @@ struct kobject { unsigned int state_in_sysfs:1; unsigned int state_add_uevent_sent:1; unsigned int state_remove_uevent_sent:1; + unsigned int uevent_suppress:1; }; extern int kobject_set_name(struct kobject *kobj, const char *name, ...) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 318328ddbd1c..b2181cc8e4d8 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -118,6 +118,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, kset = top_kobj->kset; uevent_ops = kset->uevent_ops; + /* skip the event, if uevent_suppress is set*/ + if (kobj->uevent_suppress) { + pr_debug("kobject: '%s' (%p): %s: uevent_suppress " + "caused the event to drop!\n", + kobject_name(kobj), kobj, __func__); + return 0; + } /* skip the event, if the filter returns zero. */ if (uevent_ops && uevent_ops->filter) if (!uevent_ops->filter(kset, kobj)) { -- cgit v1.2.3 From 669420644c79c207f83fdf9105ae782867e2991f Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 13 Mar 2009 12:07:36 -0600 Subject: sysfs: only allow one scheduled removal callback per kobj The only way for a sysfs attribute to remove itself (without deadlock) is to use the sysfs_schedule_callback() interface. Vegard Nossum discovered that a poorly written sysfs ->store callback can repeatedly schedule remove callbacks on the same device over and over, e.g. $ while true ; do echo 1 > /sys/devices/.../remove ; done If the 'remove' attribute uses the sysfs_schedule_callback API and also does not protect itself from concurrent accesses, its callback handler will be called multiple times, and will eventually attempt to perform operations on a freed kobject, leading to many problems. Instead of requiring all callers of sysfs_schedule_callback to implement their own synchronization, provide the protection in the infrastructure. Now, sysfs_schedule_callback will only allow one scheduled callback per kobject. On subsequent calls with the same kobject, return -EAGAIN. This is a short term fix. The long term fix is to allow sysfs attributes to remove themselves directly, without any of this callback hokey pokey. [cornelia.huck@de.ibm.com: s390 ccwgroup bits] Reported-by: vegard.nossum@gmail.com Signed-off-by: Alex Chiang Acked-by: Cornelia Huck Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/ccwgroup.c | 5 +++-- fs/sysfs/file.c | 26 +++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 918e6fce2573..b91c1719b075 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -104,8 +104,9 @@ ccwgroup_ungroup_store(struct device *dev, struct device_attribute *attr, const rc = device_schedule_callback(dev, ccwgroup_ungroup_callback); out: if (rc) { - /* Release onoff "lock" when ungrouping failed. */ - atomic_set(&gdev->onoff, 0); + if (rc != -EAGAIN) + /* Release onoff "lock" when ungrouping failed. */ + atomic_set(&gdev->onoff, 0); return rc; } return count; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1f4a3f877262..289c43a47263 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -659,13 +659,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj, EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); struct sysfs_schedule_callback_struct { - struct kobject *kobj; + struct list_head workq_list; + struct kobject *kobj; void (*func)(void *); void *data; struct module *owner; struct work_struct work; }; +static DEFINE_MUTEX(sysfs_workq_mutex); +static LIST_HEAD(sysfs_workq); static void sysfs_schedule_callback_work(struct work_struct *work) { struct sysfs_schedule_callback_struct *ss = container_of(work, @@ -674,6 +677,9 @@ static void sysfs_schedule_callback_work(struct work_struct *work) (ss->func)(ss->data); kobject_put(ss->kobj); module_put(ss->owner); + mutex_lock(&sysfs_workq_mutex); + list_del(&ss->workq_list); + mutex_unlock(&sysfs_workq_mutex); kfree(ss); } @@ -695,15 +701,25 @@ static void sysfs_schedule_callback_work(struct work_struct *work) * until @func returns. * * Returns 0 if the request was submitted, -ENOMEM if storage could not - * be allocated, -ENODEV if a reference to @owner isn't available. + * be allocated, -ENODEV if a reference to @owner isn't available, + * -EAGAIN if a callback has already been scheduled for @kobj. */ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), void *data, struct module *owner) { - struct sysfs_schedule_callback_struct *ss; + struct sysfs_schedule_callback_struct *ss, *tmp; if (!try_module_get(owner)) return -ENODEV; + + mutex_lock(&sysfs_workq_mutex); + list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list) + if (ss->kobj == kobj) { + mutex_unlock(&sysfs_workq_mutex); + return -EAGAIN; + } + mutex_unlock(&sysfs_workq_mutex); + ss = kmalloc(sizeof(*ss), GFP_KERNEL); if (!ss) { module_put(owner); @@ -715,6 +731,10 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), ss->data = data; ss->owner = owner; INIT_WORK(&ss->work, sysfs_schedule_callback_work); + INIT_LIST_HEAD(&ss->workq_list); + mutex_lock(&sysfs_workq_mutex); + list_add_tail(&ss->workq_list, &sysfs_workq); + mutex_unlock(&sysfs_workq_mutex); schedule_work(&ss->work); return 0; } -- cgit v1.2.3 From 095160aee954688a9bad225952c4bee546541e19 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 23 Mar 2009 01:41:27 +0000 Subject: sysfs: fix some bin_vm_ops errors Commit 86c9508eb1c0ce5aa07b5cf1d36b60c54efc3d7a "sysfs: don't block indefinitely for unmapped files" in linux-next crashes the PowerMac G5 when X starts up. It's caught out by the way powerpc's pci_mmap of legacy_mem uses shmem_zero_setup(), substituting a new vma->vm_file whose private_data no longer points to the bin_buffer (substitution done because some versions of X crash if that mmap fails). The fix to this is straightforward: the original vm_file is fput() in that case, so this mmap won't block sysfs at all, so just don't switch over to bin_vm_ops if vm_file has changed. But more fixes made before realizing that was the problem:- It should not be an error if bin_page_mkwrite() finds no underlying page_mkwrite(). Check that a file already mmap'ed has the same underlying vm_ops _before_ pointing vma->vm_ops at bin_vm_ops. If the file being mmap'ed is a shmem/tmpfs file, don't fail the mmap on CONFIG_NUMA=y, just because that has a set_policy and get_policy: provide bin_set_policy, bin_get_policy and bin_migrate. Signed-off-by: Hugh Dickins Acked-by: Eric Biederman Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/bin.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 96cc2bf6a84e..07703d3ff4a1 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -241,9 +241,12 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page) struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; int ret; - if (!bb->vm_ops || !bb->vm_ops->page_mkwrite) + if (!bb->vm_ops) return -EINVAL; + if (!bb->vm_ops->page_mkwrite) + return 0; + if (!sysfs_get_active_two(attr_sd)) return -EINVAL; @@ -273,12 +276,78 @@ static int bin_access(struct vm_area_struct *vma, unsigned long addr, return ret; } +#ifdef CONFIG_NUMA +static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->set_policy) + return 0; + + if (!sysfs_get_active_two(attr_sd)) + return -EINVAL; + + ret = bb->vm_ops->set_policy(vma, new); + + sysfs_put_active_two(attr_sd); + return ret; +} + +static struct mempolicy *bin_get_policy(struct vm_area_struct *vma, + unsigned long addr) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct mempolicy *pol; + + if (!bb->vm_ops || !bb->vm_ops->get_policy) + return vma->vm_policy; + + if (!sysfs_get_active_two(attr_sd)) + return vma->vm_policy; + + pol = bb->vm_ops->get_policy(vma, addr); + + sysfs_put_active_two(attr_sd); + return pol; +} + +static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, + const nodemask_t *to, unsigned long flags) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->migrate) + return 0; + + if (!sysfs_get_active_two(attr_sd)) + return 0; + + ret = bb->vm_ops->migrate(vma, from, to, flags); + + sysfs_put_active_two(attr_sd); + return ret; +} +#endif + static struct vm_operations_struct bin_vm_ops = { .open = bin_vma_open, .close = bin_vma_close, .fault = bin_fault, .page_mkwrite = bin_page_mkwrite, .access = bin_access, +#ifdef CONFIG_NUMA + .set_policy = bin_set_policy, + .get_policy = bin_get_policy, + .migrate = bin_migrate, +#endif }; static int mmap(struct file *file, struct vm_area_struct *vma) @@ -287,7 +356,6 @@ static int mmap(struct file *file, struct vm_area_struct *vma) struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - struct vm_operations_struct *vm_ops; int rc; mutex_lock(&bb->mutex); @@ -302,24 +370,25 @@ static int mmap(struct file *file, struct vm_area_struct *vma) goto out_put; rc = attr->mmap(kobj, attr, vma); - vm_ops = vma->vm_ops; - vma->vm_ops = &bin_vm_ops; if (rc) goto out_put; - rc = -EINVAL; - if (bb->mmapped && bb->vm_ops != vma->vm_ops) + /* + * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() + * to satisfy versions of X which crash if the mmap fails: that + * substitutes a new vm_file, and we don't then want bin_vm_ops. + */ + if (vma->vm_file != file) goto out_put; -#ifdef CONFIG_NUMA rc = -EINVAL; - if (vm_ops && ((vm_ops->set_policy || vm_ops->get_policy || vm_ops->migrate))) + if (bb->mmapped && bb->vm_ops != vma->vm_ops) goto out_put; -#endif rc = 0; bb->mmapped = 1; - bb->vm_ops = vm_ops; + bb->vm_ops = vma->vm_ops; + vma->vm_ops = &bin_vm_ops; out_put: sysfs_put_active_two(attr_sd); out_unlock: -- cgit v1.2.3