From fe15ce446beb3a33583af81ffe6c9d01a75314ed Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:23 +1100 Subject: fs: change d_delete semantics Change d_delete from a dentry deletion notification to a dentry caching advise, more like ->drop_inode. Require it to be constant and idempotent, and not take d_lock. This is how all existing filesystems use the callback anyway. This makes fine grained dentry locking of dput and dentry lru scanning much simpler. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index f088ea2f6ac9..2270d4822c2f 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -276,7 +276,7 @@ smb_dir_open(struct inode *dir, struct file *file) static int smb_lookup_validate(struct dentry *, struct nameidata *); static int smb_hash_dentry(struct dentry *, struct qstr *); static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *); -static int smb_delete_dentry(struct dentry *); +static int smb_delete_dentry(const struct dentry *); static const struct dentry_operations smbfs_dentry_operations = { @@ -367,7 +367,7 @@ out: * We use this to unhash dentries with bad inodes. */ static int -smb_delete_dentry(struct dentry * dentry) +smb_delete_dentry(const struct dentry *dentry) { if (dentry->d_inode) { if (is_bad_inode(dentry->d_inode)) { -- cgit v1.2.3 From fb2d5b86aff355a27ebfc132d3c99f4a940cc3fe Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:26 +1100 Subject: fs: name case update method smpfs and ncpfs want to update a live dentry name in-place. Rather than have them open code the locking, provide a documented dcache API. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/cache.c | 4 ++-- fs/dcache.c | 27 +++++++++++++++++++++++++++ fs/ncpfs/dir.c | 35 ++++++----------------------------- include/linux/dcache.h | 2 ++ 4 files changed, 37 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index dbb98658148b..dbd2e1df3ba9 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -145,8 +145,8 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, goto end_advance; } else { hashed = 1; - memcpy((char *) newdent->d_name.name, qname->name, - newdent->d_name.len); + /* dir i_mutex is locked because we're in readdir */ + dentry_update_name_case(newdent, qname); } if (!newdent->d_inode) { diff --git a/fs/dcache.c b/fs/dcache.c index 6ee6bc40cb63..814e5f491e9c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1589,6 +1589,33 @@ void d_rehash(struct dentry * entry) } EXPORT_SYMBOL(d_rehash); +/** + * dentry_update_name_case - update case insensitive dentry with a new name + * @dentry: dentry to be updated + * @name: new name + * + * Update a case insensitive dentry with new case of name. + * + * dentry must have been returned by d_lookup with name @name. Old and new + * name lengths must match (ie. no d_compare which allows mismatched name + * lengths). + * + * Parent inode i_mutex must be held over d_lookup and into this call (to + * keep renames and concurrent inserts, and readdir(2) away). + */ +void dentry_update_name_case(struct dentry *dentry, struct qstr *name) +{ + BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); + BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ + + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); +} +EXPORT_SYMBOL(dentry_update_name_case); + /* * When switching names, the actual string doesn't strictly have to * be preserved in the target - because we're dropping the target diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index d6e6453881ce..e80ea4e37c48 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -611,35 +611,12 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, shrink_dcache_parent(newdent); /* - * It is not as dangerous as it looks. NetWare's OS2 namespace is - * case preserving yet case insensitive. So we update dentry's name - * as received from server. We found dentry via d_lookup with our - * hash, so we know that hash does not change, and so replacing name - * should be reasonably safe. + * NetWare's OS2 namespace is case preserving yet case + * insensitive. So we update dentry's name as received from + * server. Parent dir's i_mutex is locked because we're in + * readdir. */ - if (qname.len == newdent->d_name.len && - memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) { - struct inode *inode = newdent->d_inode; - - /* - * Inside ncpfs all uses of d_name are either for debugging, - * or on functions which acquire inode mutex (mknod, creat, - * lookup). So grab i_mutex here, to be sure. d_path - * uses dcache_lock when generating path, so we should too. - * And finally d_compare is protected by dentry's d_lock, so - * here we go. - */ - if (inode) - mutex_lock(&inode->i_mutex); - spin_lock(&dcache_lock); - spin_lock(&newdent->d_lock); - memcpy((char *) newdent->d_name.name, qname.name, - newdent->d_name.len); - spin_unlock(&newdent->d_lock); - spin_unlock(&dcache_lock); - if (inode) - mutex_unlock(&inode->i_mutex); - } + dentry_update_name_case(newdent, &qname); } if (!newdent->d_inode) { @@ -657,7 +634,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, } else { struct inode *inode = newdent->d_inode; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ncp_update_inode2(inode, entry); mutex_unlock(&inode->i_mutex); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index cbfc9567e4e9..6cdf4995c90a 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -290,6 +290,8 @@ static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *in return res; } +extern void dentry_update_name_case(struct dentry *, struct qstr *); + /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); -- cgit v1.2.3 From 621e155a3591962420eacdd39f6f0aa29ceb221e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:27 +1100 Subject: fs: change d_compare for rcu-walk Change d_compare so it may be called from lock-free RCU lookups. This does put significant restrictions on what may be done from the callback, however there don't seem to have been any problems with in-tree fses. If some strange use case pops up that _really_ cannot cope with the rcu-walk rules, we can just add new rcu-unaware callbacks, which would cause name lookup to drop out of rcu-walk mode. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- Documentation/filesystems/Locking | 4 +- Documentation/filesystems/porting | 7 +++ Documentation/filesystems/vfs.txt | 26 +++++++++-- drivers/staging/smbfs/dir.c | 16 ++++--- fs/adfs/dir.c | 8 ++-- fs/affs/namei.c | 46 ++++++++++++-------- fs/cifs/dir.c | 12 ++--- fs/dcache.c | 4 +- fs/fat/namei_msdos.c | 14 +++--- fs/fat/namei_vfat.c | 33 ++++++++------ fs/hfs/hfs_fs.h | 5 ++- fs/hfs/string.c | 14 +++--- fs/hfsplus/hfsplus_fs.h | 5 ++- fs/hfsplus/unicode.c | 15 ++++--- fs/hpfs/dentry.c | 22 ++++++---- fs/isofs/inode.c | 92 ++++++++++++++++++++------------------- fs/isofs/namei.c | 3 +- fs/jfs/namei.c | 11 +++-- fs/ncpfs/dir.c | 25 ++++++----- fs/ncpfs/ncplib_kernel.h | 8 ++-- fs/proc/proc_sysctl.c | 13 +++--- include/linux/dcache.h | 12 +++-- include/linux/ncp_fs.h | 4 +- 23 files changed, 242 insertions(+), 157 deletions(-) (limited to 'drivers') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 33fa3e5d38fd..9a76f8d8bf95 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -11,7 +11,9 @@ be able to use diff(1). prototypes: int (*d_revalidate)(struct dentry *, int); int (*d_hash) (struct dentry *, struct qstr *); - int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); + int (*d_compare)(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); int (*d_delete)(struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 9e71c9ad3108..d44511e20828 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -326,3 +326,10 @@ to it. unreferenced dentries, and is now only called when the dentry refcount goes to 0. Even on 0 refcount transition, it must be able to tolerate being called 0, 1, or more times (eg. constant, idempotent). + +--- +[mandatory] + + .d_compare() calling convention and locking rules are significantly +changed. Read updated documentation in Documentation/filesystems/vfs.txt (and +look at examples of other filesystems) for guidance. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 95c0a93f056c..250681b8c7cc 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -848,7 +848,9 @@ defined: struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); int (*d_hash)(struct dentry *, struct qstr *); - int (*d_compare)(struct dentry *, struct qstr *, struct qstr *); + int (*d_compare)(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); @@ -860,9 +862,27 @@ struct dentry_operations { dcache. Most filesystems leave this as NULL, because all their dentries in the dcache are valid - d_hash: called when the VFS adds a dentry to the hash table + d_hash: called when the VFS adds a dentry to the hash table. The first + dentry passed to d_hash is the parent directory that the name is + to be hashed into. - d_compare: called when a dentry should be compared with another + d_compare: called to compare a dentry name with a given name. The first + dentry is the parent of the dentry to be compared, the second is + the parent's inode, then the dentry and inode (may be NULL) of the + child dentry. len and name string are properties of the dentry to be + compared. qstr is the name to compare it with. + + Must be constant and idempotent, and should not take locks if + possible, and should not or store into the dentry or inodes. + Should not dereference pointers outside the dentry or inodes without + lots of care (eg. d_parent, d_inode, d_name should not be used). + + However, our vfsmount is pinned, and RCU held, so the dentries and + inodes won't disappear, neither will our sb or filesystem module. + ->i_sb and ->d_sb may be used. + + It is a tricky calling convention because it needs to be called under + "rcu-walk", ie. without any locks or references on things. d_delete: called when the last reference to a dentry is dropped and the dcache is deciding whether or not to cache it. Return 1 to delete diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 2270d4822c2f..bd63229f43f2 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -275,7 +275,10 @@ smb_dir_open(struct inode *dir, struct file *file) */ static int smb_lookup_validate(struct dentry *, struct nameidata *); static int smb_hash_dentry(struct dentry *, struct qstr *); -static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +static int smb_compare_dentry(const struct dentry *, + const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); static int smb_delete_dentry(const struct dentry *); static const struct dentry_operations smbfs_dentry_operations = @@ -347,14 +350,17 @@ smb_hash_dentry(struct dentry *dir, struct qstr *this) } static int -smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b) +smb_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { int i, result = 1; - if (a->len != b->len) + if (len != name->len) goto out; - for (i=0; i < a->len; i++) { - if (tolower(a->name[i]) != tolower(b->name[i])) + for (i=0; i < len; i++) { + if (tolower(str[i]) != tolower(name->name[i])) goto out; } result = 0; diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index f4287e4de744..c8ed66162bd4 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -237,17 +237,19 @@ adfs_hash(struct dentry *parent, struct qstr *qstr) * requirements of the underlying filesystem. */ static int -adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name) +adfs_compare(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { int i; - if (entry->len != name->len) + if (len != name->len) return 1; for (i = 0; i < name->len; i++) { char a, b; - a = entry->name[i]; + a = str[i]; b = name->name[i]; if (a >= 'A' && a <= 'Z') diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 914d1c0bc07a..547d5deb0d42 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -14,10 +14,16 @@ typedef int (*toupper_t)(int); static int affs_toupper(int ch); static int affs_hash_dentry(struct dentry *, struct qstr *); -static int affs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +static int affs_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); static int affs_intl_toupper(int ch); static int affs_intl_hash_dentry(struct dentry *, struct qstr *); -static int affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +static int affs_intl_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); const struct dentry_operations affs_dentry_operations = { .d_hash = affs_hash_dentry, @@ -88,29 +94,29 @@ affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr) return __affs_hash_dentry(dentry, qstr, affs_intl_toupper); } -static inline int -__affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, toupper_t toupper) +static inline int __affs_compare_dentry(unsigned int len, + const char *str, const struct qstr *name, toupper_t toupper) { - const u8 *aname = a->name; - const u8 *bname = b->name; - int len; + const u8 *aname = str; + const u8 *bname = name->name; - /* 'a' is the qstr of an already existing dentry, so the name - * must be valid. 'b' must be validated first. + /* + * 'str' is the name of an already existing dentry, so the name + * must be valid. 'name' must be validated first. */ - if (affs_check_name(b->name,b->len)) + if (affs_check_name(name->name, name->len)) return 1; - /* If the names are longer than the allowed 30 chars, + /* + * If the names are longer than the allowed 30 chars, * the excess is ignored, so their length may differ. */ - len = a->len; if (len >= 30) { - if (b->len < 30) + if (name->len < 30) return 1; len = 30; - } else if (len != b->len) + } else if (len != name->len) return 1; for (; len > 0; len--) @@ -121,14 +127,18 @@ __affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, tou } static int -affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +affs_compare_dentry(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return __affs_compare_dentry(dentry, a, b, affs_toupper); + return __affs_compare_dentry(len, str, name, affs_toupper); } static int -affs_intl_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return __affs_compare_dentry(dentry, a, b, affs_intl_toupper); + return __affs_compare_dentry(len, str, name, affs_intl_toupper); } /* diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 521d841b1fd1..c60133f0d8e4 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -715,13 +715,15 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) return 0; } -static int cifs_ci_compare(struct dentry *dentry, struct qstr *a, - struct qstr *b) +static int cifs_ci_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; + struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls; - if ((a->len == b->len) && - (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) + if ((name->len == len) && + (nls_strnicmp(codepage, name->name, str, len) == 0)) return 0; return 1; } diff --git a/fs/dcache.c b/fs/dcache.c index 814e5f491e9c..7075555fbb04 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1437,7 +1437,9 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) */ qstr = &dentry->d_name; if (parent->d_op && parent->d_op->d_compare) { - if (parent->d_op->d_compare(parent, qstr, name)) + if (parent->d_op->d_compare(parent, parent->d_inode, + dentry, dentry->d_inode, + qstr->len, qstr->name, name)) goto next; } else { if (qstr->len != len) diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3345aabd1dd7..99d3c7ac973c 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -164,16 +164,18 @@ static int msdos_hash(struct dentry *dentry, struct qstr *qstr) * Compare two msdos names. If either of the names are invalid, * we fall back to doing the standard name comparison. */ -static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int msdos_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; + struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options; unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME]; int error; - error = msdos_format_name(a->name, a->len, a_msdos_name, options); + error = msdos_format_name(name->name, name->len, a_msdos_name, options); if (error) goto old_compare; - error = msdos_format_name(b->name, b->len, b_msdos_name, options); + error = msdos_format_name(str, len, b_msdos_name, options); if (error) goto old_compare; error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME); @@ -182,8 +184,8 @@ out: old_compare: error = 1; - if (a->len == b->len) - error = memcmp(a->name, b->name, a->len); + if (name->len == len) + error = memcmp(name->name, str, len); goto out; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index b936703b8924..95e00ab84c3f 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -85,15 +85,18 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) } /* returns the length of a struct qstr, ignoring trailing dots */ -static unsigned int vfat_striptail_len(struct qstr *qstr) +static unsigned int __vfat_striptail_len(unsigned int len, const char *name) { - unsigned int len = qstr->len; - - while (len && qstr->name[len - 1] == '.') + while (len && name[len - 1] == '.') len--; return len; } +static unsigned int vfat_striptail_len(const struct qstr *qstr) +{ + return __vfat_striptail_len(qstr->len, qstr->name); +} + /* * Compute the hash for the vfat name corresponding to the dentry. * Note: if the name is invalid, we leave the hash code unchanged so @@ -133,16 +136,18 @@ static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) /* * Case insensitive compare of two vfat names. */ -static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; + struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; unsigned int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ - alen = vfat_striptail_len(a); - blen = vfat_striptail_len(b); + alen = vfat_striptail_len(name); + blen = __vfat_striptail_len(len, str); if (alen == blen) { - if (nls_strnicmp(t, a->name, b->name, alen) == 0) + if (nls_strnicmp(t, name->name, str, alen) == 0) return 0; } return 1; @@ -151,15 +156,17 @@ static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) /* * Case sensitive compare of two vfat names. */ -static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int vfat_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { unsigned int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ - alen = vfat_striptail_len(a); - blen = vfat_striptail_len(b); + alen = vfat_striptail_len(name); + blen = __vfat_striptail_len(len, str); if (alen == blen) { - if (strncmp(a->name, b->name, alen) == 0) + if (strncmp(name->name, str, alen) == 0) return 0; } return 1; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index c8cffb81e849..8cd876f0e961 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -216,7 +216,10 @@ extern const struct dentry_operations hfs_dentry_operations; extern int hfs_hash_dentry(struct dentry *, struct qstr *); extern int hfs_strcmp(const unsigned char *, unsigned int, const unsigned char *, unsigned int); -extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +extern int hfs_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); /* trans.c */ extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); diff --git a/fs/hfs/string.c b/fs/hfs/string.c index 927a5af79428..aaf90d0d6940 100644 --- a/fs/hfs/string.c +++ b/fs/hfs/string.c @@ -92,21 +92,21 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1, * Test for equality of two strings in the HFS filename character ordering. * return 1 on failure and 0 on success */ -int hfs_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) +int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { const unsigned char *n1, *n2; - int len; - len = s1->len; if (len >= HFS_NAMELEN) { - if (s2->len < HFS_NAMELEN) + if (name->len < HFS_NAMELEN) return 1; len = HFS_NAMELEN; - } else if (len != s2->len) + } else if (len != name->len) return 1; - n1 = s1->name; - n2 = s2->name; + n1 = str; + n2 = name->name; while (len--) { if (caseorder[*n1++] != caseorder[*n2++]) return 1; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index cb3653efb57a..7aa96eefe483 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -380,7 +380,10 @@ int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *) int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); -int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2); +int hfsplus_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); /* wrapper.c */ int hfsplus_read_wrapper(struct super_block *); diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index b66d67de882c..b178c997efa8 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -363,9 +363,12 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ -int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) +int hfsplus_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct super_block *sb = dentry->d_sb; + struct super_block *sb = parent->d_sb; int casefold, decompose, size; int dsize1, dsize2, len1, len2; const u16 *dstr1, *dstr2; @@ -375,10 +378,10 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); - astr1 = s1->name; - len1 = s1->len; - astr2 = s2->name; - len2 = s2->len; + astr1 = str; + len1 = len; + astr2 = name->name; + len2 = name->len; dsize1 = dsize2 = 0; dstr1 = dstr2 = NULL; diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 67d9d36b3d5f..dd9b1e74a734 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -34,19 +34,25 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr) return 0; } -static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int hpfs_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - unsigned al=a->len; - unsigned bl=b->len; - hpfs_adjust_length(a->name, &al); + unsigned al = len; + unsigned bl = name->len; + + hpfs_adjust_length(str, &al); /*hpfs_adjust_length(b->name, &bl);*/ - /* 'a' is the qstr of an already existing dentry, so the name - * must be valid. 'b' must be validated first. + + /* + * 'str' is the nane of an already existing dentry, so the name + * must be valid. 'name' must be validated first. */ - if (hpfs_chk_name(b->name, &bl)) + if (hpfs_chk_name(name->name, &bl)) return 1; - if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0)) + if (hpfs_compare_names(parent->d_sb, str, al, name->name, bl, 0)) return 1; return 0; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index bfdeb82a53be..7b0fbc61af81 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -28,14 +28,26 @@ static int isofs_hashi(struct dentry *parent, struct qstr *qstr); static int isofs_hash(struct dentry *parent, struct qstr *qstr); -static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); -static int isofs_dentry_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b); +static int isofs_dentry_cmpi(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); +static int isofs_dentry_cmp(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); #ifdef CONFIG_JOLIET static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr); static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr); -static int isofs_dentry_cmpi_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); -static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); +static int isofs_dentry_cmpi_ms(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); +static int isofs_dentry_cmp_ms(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); #endif static void isofs_put_super(struct super_block *sb) @@ -206,49 +218,31 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) } /* - * Case insensitive compare of two isofs names. + * Compare of two isofs names. */ -static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a, - struct qstr *b, int ms) +static int isofs_dentry_cmp_common( + unsigned int len, const char *str, + const struct qstr *name, int ms, int ci) { int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ - alen = a->len; - blen = b->len; + alen = name->len; + blen = len; if (ms) { - while (alen && a->name[alen-1] == '.') + while (alen && name->name[alen-1] == '.') alen--; - while (blen && b->name[blen-1] == '.') + while (blen && str[blen-1] == '.') blen--; } if (alen == blen) { - if (strnicmp(a->name, b->name, alen) == 0) - return 0; - } - return 1; -} - -/* - * Case sensitive compare of two isofs names. - */ -static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a, - struct qstr *b, int ms) -{ - int alen, blen; - - /* A filename cannot end in '.' or we treat it like it has none */ - alen = a->len; - blen = b->len; - if (ms) { - while (alen && a->name[alen-1] == '.') - alen--; - while (blen && b->name[blen-1] == '.') - blen--; - } - if (alen == blen) { - if (strncmp(a->name, b->name, alen) == 0) - return 0; + if (ci) { + if (strnicmp(name->name, str, alen) == 0) + return 0; + } else { + if (strncmp(name->name, str, alen) == 0) + return 0; + } } return 1; } @@ -266,15 +260,19 @@ isofs_hashi(struct dentry *dentry, struct qstr *qstr) } static int -isofs_dentry_cmp(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmp_common(dentry, a, b, 0); + return isofs_dentry_cmp_common(len, str, name, 0, 0); } static int -isofs_dentry_cmpi(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmpi_common(dentry, a, b, 0); + return isofs_dentry_cmp_common(len, str, name, 0, 1); } #ifdef CONFIG_JOLIET @@ -291,15 +289,19 @@ isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr) } static int -isofs_dentry_cmp_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmp_common(dentry, a, b, 1); + return isofs_dentry_cmp_common(len, str, name, 1, 0); } static int -isofs_dentry_cmpi_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmpi_common(dentry, a, b, 1); + return isofs_dentry_cmp_common(len, str, name, 1, 1); } #endif diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 0d23abfd4280..715f7d318046 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -37,7 +37,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen) qstr.name = compare; qstr.len = dlen; - return dentry->d_op->d_compare(dentry, &dentry->d_name, &qstr); + return dentry->d_op->d_compare(NULL, NULL, NULL, NULL, + dentry->d_name.len, dentry->d_name.name, &qstr); } /* diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 2da1546161fa..92129016cd79 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1587,14 +1587,17 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this) return 0; } -static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b) +static int jfs_ci_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { int i, result = 1; - if (a->len != b->len) + if (len != name->len) goto out; - for (i=0; i < a->len; i++) { - if (tolower(a->name[i]) != tolower(b->name[i])) + for (i=0; i < len; i++) { + if (tolower(str[i]) != tolower(name->name[i])) goto out; } result = 0; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index e80ea4e37c48..3bcc68aed416 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -75,7 +75,9 @@ const struct inode_operations ncp_dir_inode_operations = */ static int ncp_lookup_validate(struct dentry *, struct nameidata *); static int ncp_hash_dentry(struct dentry *, struct qstr *); -static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *); +static int ncp_compare_dentry(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); static int ncp_delete_dentry(const struct dentry *); static const struct dentry_operations ncp_dentry_operations = @@ -113,10 +115,10 @@ static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator) #define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) -static inline int ncp_case_sensitive(struct dentry *dentry) +static inline int ncp_case_sensitive(const struct inode *i) { #ifdef CONFIG_NCPFS_NFS_NS - return ncp_namespace(dentry->d_inode) == NW_NS_NFS; + return ncp_namespace(i) == NW_NS_NFS; #else return 0; #endif /* CONFIG_NCPFS_NFS_NS */ @@ -129,12 +131,13 @@ static inline int ncp_case_sensitive(struct dentry *dentry) static int ncp_hash_dentry(struct dentry *dentry, struct qstr *this) { - if (!ncp_case_sensitive(dentry)) { + if (!ncp_case_sensitive(dentry->d_inode)) { + struct super_block *sb = dentry->d_sb; struct nls_table *t; unsigned long hash; int i; - t = NCP_IO_TABLE(dentry); + t = NCP_IO_TABLE(sb); hash = init_name_hash(); for (i=0; ilen ; i++) hash = partial_name_hash(ncp_tolower(t, this->name[i]), @@ -145,15 +148,17 @@ ncp_hash_dentry(struct dentry *dentry, struct qstr *this) } static int -ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - if (a->len != b->len) + if (len != name->len) return 1; - if (ncp_case_sensitive(dentry)) - return strncmp(a->name, b->name, a->len); + if (ncp_case_sensitive(pinode)) + return strncmp(str, name->name, len); - return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len); + return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len); } /* diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 3c57eca634ce..244d1b73fda7 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -135,7 +135,7 @@ int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *, const unsigned char *, unsigned int, int); #define NCP_ESC ':' -#define NCP_IO_TABLE(dentry) (NCP_SERVER((dentry)->d_inode)->nls_io) +#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io) #define ncp_tolower(t, c) nls_tolower(t, c) #define ncp_toupper(t, c) nls_toupper(t, c) #define ncp_strnicmp(t, s1, s2, len) \ @@ -150,15 +150,15 @@ int ncp__io2vol(unsigned char *, unsigned int *, int ncp__vol2io(unsigned char *, unsigned int *, const unsigned char *, unsigned int, int); -#define NCP_IO_TABLE(dentry) NULL +#define NCP_IO_TABLE(sb) NULL #define ncp_tolower(t, c) tolower(c) #define ncp_toupper(t, c) toupper(c) #define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U) #define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U) -static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1, - const unsigned char *s2, int len) +static inline int ncp_strnicmp(const struct nls_table *t, + const unsigned char *s1, const unsigned char *s2, int len) { while (len--) { if (tolower(*s1++) != tolower(*s2++)) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a256d770ea18..ae4b0fd9033f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -397,15 +397,16 @@ static int proc_sys_delete(const struct dentry *dentry) return !!PROC_I(dentry->d_inode)->sysctl->unregistering; } -static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, - struct qstr *name) +static int proc_sys_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct dentry *dentry = container_of(qstr, struct dentry, d_name); - if (qstr->len != name->len) + if (name->len != len) return 1; - if (memcmp(qstr->name, name->name, name->len)) + if (memcmp(name->name, str, len)) return 1; - return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); + return !sysctl_is_seen(PROC_I(inode)->sysctl); } static const struct dentry_operations proc_sys_dentry_operations = { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6cdf4995c90a..75a072bf2a34 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -134,7 +134,9 @@ enum dentry_d_lock_class struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); int (*d_hash)(struct dentry *, struct qstr *); - int (*d_compare)(struct dentry *, struct qstr *, struct qstr *); + int (*d_compare)(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); @@ -145,12 +147,8 @@ struct dentry_operations { * Locking rules for dentry_operations callbacks are to be found in * Documentation/filesystems/Locking. Keep it updated! * - * the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining - * equivalency classes. Using the dentry itself might not work, as it - * might be a negative dentry which has no information associated with - * it. + * FUrther descriptions are found in Documentation/filesystems/vfs.txt. + * Keep it updated too! */ /* d_flags entries */ diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index ef663061d5ac..1c27f201c856 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -184,13 +184,13 @@ struct ncp_entry_info { __u8 file_handle[6]; }; -static inline struct ncp_server *NCP_SBP(struct super_block *sb) +static inline struct ncp_server *NCP_SBP(const struct super_block *sb) { return sb->s_fs_info; } #define NCP_SERVER(inode) NCP_SBP((inode)->i_sb) -static inline struct ncp_inode_info *NCP_FINFO(struct inode *inode) +static inline struct ncp_inode_info *NCP_FINFO(const struct inode *inode) { return container_of(inode, struct ncp_inode_info, vfs_inode); } -- cgit v1.2.3 From b1e6a015a580ad145689ad1d6b4aa0e03e6c868b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:28 +1100 Subject: fs: change d_hash for rcu-walk Change d_hash so it may be called from lock-free RCU lookups. See similar patch for d_compare for details. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- Documentation/filesystems/Locking | 5 +++-- Documentation/filesystems/porting | 7 +++++++ Documentation/filesystems/vfs.txt | 8 ++++++-- drivers/staging/smbfs/cache.c | 2 +- drivers/staging/smbfs/dir.c | 6 ++++-- fs/adfs/dir.c | 3 ++- fs/affs/namei.c | 20 ++++++++++++-------- fs/cifs/dir.c | 5 +++-- fs/cifs/readdir.c | 2 +- fs/dcache.c | 2 +- fs/ecryptfs/inode.c | 4 ++-- fs/fat/namei_msdos.c | 3 ++- fs/fat/namei_vfat.c | 8 +++++--- fs/gfs2/dentry.c | 3 ++- fs/hfs/hfs_fs.h | 3 ++- fs/hfs/string.c | 3 ++- fs/hfsplus/hfsplus_fs.h | 3 ++- fs/hfsplus/unicode.c | 3 ++- fs/hpfs/dentry.c | 3 ++- fs/isofs/inode.c | 28 ++++++++++++++++++---------- fs/jfs/namei.c | 3 ++- fs/namei.c | 5 +++-- fs/ncpfs/dir.c | 10 ++++++---- fs/sysv/namei.c | 3 ++- include/linux/dcache.h | 3 ++- 25 files changed, 94 insertions(+), 51 deletions(-) (limited to 'drivers') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 9a76f8d8bf95..a15ee207b449 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -10,7 +10,8 @@ be able to use diff(1). --------------------------- dentry_operations -------------------------- prototypes: int (*d_revalidate)(struct dentry *, int); - int (*d_hash) (struct dentry *, struct qstr *); + int (*d_hash)(const struct dentry *, const struct inode *, + struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, unsigned int, const char *, const struct qstr *); @@ -22,7 +23,7 @@ prototypes: locking rules: dcache_lock rename_lock ->d_lock may block d_revalidate: no no no yes -d_hash no no no yes +d_hash no no no no d_compare: no yes no no d_delete: yes no yes no d_release: no no no yes diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index d44511e20828..9fd31940a8ef 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -333,3 +333,10 @@ unreferenced dentries, and is now only called when the dentry refcount goes to .d_compare() calling convention and locking rules are significantly changed. Read updated documentation in Documentation/filesystems/vfs.txt (and look at examples of other filesystems) for guidance. + +--- +[mandatory] + + .d_hash() calling convention and locking rules are significantly +changed. Read updated documentation in Documentation/filesystems/vfs.txt (and +look at examples of other filesystems) for guidance. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 250681b8c7cc..69b10ff5ec81 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -847,7 +847,8 @@ defined: struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); - int (*d_hash)(struct dentry *, struct qstr *); + int (*d_hash)(const struct dentry *, const struct inode *, + struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, unsigned int, const char *, const struct qstr *); @@ -864,7 +865,10 @@ struct dentry_operations { d_hash: called when the VFS adds a dentry to the hash table. The first dentry passed to d_hash is the parent directory that the name is - to be hashed into. + to be hashed into. The inode is the dentry's inode. + + Same locking and synchronisation rules as d_compare regarding + what is safe to dereference etc. d_compare: called to compare a dentry name with a given name. The first dentry is the parent of the dentry to be compared, the second is diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index dbd2e1df3ba9..0beded260b00 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -134,7 +134,7 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, qname->hash = full_name_hash(qname->name, qname->len); if (dentry->d_op && dentry->d_op->d_hash) - if (dentry->d_op->d_hash(dentry, qname) != 0) + if (dentry->d_op->d_hash(dentry, inode, qname) != 0) goto end_advance; newdent = d_lookup(dentry, qname); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index bd63229f43f2..5f79799d5d4a 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -274,7 +274,8 @@ smb_dir_open(struct inode *dir, struct file *file) * Dentry operations routines */ static int smb_lookup_validate(struct dentry *, struct nameidata *); -static int smb_hash_dentry(struct dentry *, struct qstr *); +static int smb_hash_dentry(const struct dentry *, const struct inode *, + struct qstr *); static int smb_compare_dentry(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, @@ -336,7 +337,8 @@ smb_lookup_validate(struct dentry * dentry, struct nameidata *nd) } static int -smb_hash_dentry(struct dentry *dir, struct qstr *this) +smb_hash_dentry(const struct dentry *dir, const struct inode *inode, + struct qstr *this) { unsigned long hash; int i; diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index c8ed66162bd4..a11e5e102716 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -201,7 +201,8 @@ const struct file_operations adfs_dir_operations = { }; static int -adfs_hash(struct dentry *parent, struct qstr *qstr) +adfs_hash(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr) { const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; const unsigned char *name; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 547d5deb0d42..5aca08c21100 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -13,13 +13,15 @@ typedef int (*toupper_t)(int); static int affs_toupper(int ch); -static int affs_hash_dentry(struct dentry *, struct qstr *); +static int affs_hash_dentry(const struct dentry *, + const struct inode *, struct qstr *); static int affs_compare_dentry(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, unsigned int len, const char *str, const struct qstr *name); static int affs_intl_toupper(int ch); -static int affs_intl_hash_dentry(struct dentry *, struct qstr *); +static int affs_intl_hash_dentry(const struct dentry *, + const struct inode *, struct qstr *); static int affs_intl_compare_dentry(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, @@ -64,13 +66,13 @@ affs_get_toupper(struct super_block *sb) * Note: the dentry argument is the parent dentry. */ static inline int -__affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper) +__affs_hash_dentry(struct qstr *qstr, toupper_t toupper) { const u8 *name = qstr->name; unsigned long hash; int i; - i = affs_check_name(qstr->name,qstr->len); + i = affs_check_name(qstr->name, qstr->len); if (i) return i; @@ -84,14 +86,16 @@ __affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper) } static int -affs_hash_dentry(struct dentry *dentry, struct qstr *qstr) +affs_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { - return __affs_hash_dentry(dentry, qstr, affs_toupper); + return __affs_hash_dentry(qstr, affs_toupper); } static int -affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr) +affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { - return __affs_hash_dentry(dentry, qstr, affs_intl_toupper); + return __affs_hash_dentry(qstr, affs_intl_toupper); } static inline int __affs_compare_dentry(unsigned int len, diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index c60133f0d8e4..88bfe686ac00 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -700,9 +700,10 @@ const struct dentry_operations cifs_dentry_ops = { /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ }; -static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) +static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *q) { - struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; + struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; unsigned long hash; int i; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index a73eb9f4bdaf..ee463aeca0b0 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, cFYI(1, "For %s", name->name); if (parent->d_op && parent->d_op->d_hash) - parent->d_op->d_hash(parent, name); + parent->d_op->d_hash(parent, parent->d_inode, name); else name->hash = full_name_hash(name->name, name->len); diff --git a/fs/dcache.c b/fs/dcache.c index 7075555fbb04..6f59f375ed9d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1478,7 +1478,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) */ name->hash = full_name_hash(name->name, name->len); if (dir->d_op && dir->d_op->d_hash) { - if (dir->d_op->d_hash(dir, name) < 0) + if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } dentry = d_lookup(dir, name); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 9d1a22d62765..a1ed7a7cb173 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -454,7 +454,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, lower_name.hash = ecryptfs_dentry->d_name.hash; if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - &lower_name); + lower_dir_dentry->d_inode, &lower_name); if (rc < 0) goto out_d_drop; } @@ -489,7 +489,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, lower_name.hash = full_name_hash(lower_name.name, lower_name.len); if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - &lower_name); + lower_dir_dentry->d_inode, &lower_name); if (rc < 0) goto out_d_drop; } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 99d3c7ac973c..3b3e072d8982 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -148,7 +148,8 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len, * that the existing dentry can be used. The msdos fs routines will * return ENOENT or EINVAL as appropriate. */ -static int msdos_hash(struct dentry *dentry, struct qstr *qstr) +static int msdos_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; unsigned char msdos_name[MSDOS_NAME]; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 95e00ab84c3f..4fc06278db48 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -103,7 +103,8 @@ static unsigned int vfat_striptail_len(const struct qstr *qstr) * that the existing dentry can be used. The vfat fs routines will * return ENOENT or EINVAL as appropriate. */ -static int vfat_hash(struct dentry *dentry, struct qstr *qstr) +static int vfat_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); return 0; @@ -115,9 +116,10 @@ static int vfat_hash(struct dentry *dentry, struct qstr *qstr) * that the existing dentry can be used. The vfat fs routines will * return ENOENT or EINVAL as appropriate. */ -static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) +static int vfat_hashi(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { - struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; + struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; const unsigned char *name; unsigned int len; unsigned long hash; diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index e80fea2f65ff..50497f65763b 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -100,7 +100,8 @@ fail: return 0; } -static int gfs2_dhash(struct dentry *dentry, struct qstr *str) +static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode, + struct qstr *str) { str->hash = gfs2_disk_hash(str->name, str->len); return 0; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 8cd876f0e961..ad97c2d58287 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -213,7 +213,8 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *); /* string.c */ extern const struct dentry_operations hfs_dentry_operations; -extern int hfs_hash_dentry(struct dentry *, struct qstr *); +extern int hfs_hash_dentry(const struct dentry *, const struct inode *, + struct qstr *); extern int hfs_strcmp(const unsigned char *, unsigned int, const unsigned char *, unsigned int); extern int hfs_compare_dentry(const struct dentry *parent, diff --git a/fs/hfs/string.c b/fs/hfs/string.c index aaf90d0d6940..495a976a3cc9 100644 --- a/fs/hfs/string.c +++ b/fs/hfs/string.c @@ -51,7 +51,8 @@ static unsigned char caseorder[256] = { /* * Hash a string to an integer in a case-independent way */ -int hfs_hash_dentry(struct dentry *dentry, struct qstr *this) +int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *this) { const unsigned char *name = this->name; unsigned int hash, len = this->len; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 7aa96eefe483..a5308f491e3e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -379,7 +379,8 @@ int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unist int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); -int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); +int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *str); int hfsplus_compare_dentry(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index b178c997efa8..d800aa0f2c80 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -320,7 +320,8 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ -int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) +int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *str) { struct super_block *sb = dentry->d_sb; const char *astr; diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index dd9b1e74a734..35526df1fd32 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -12,7 +12,8 @@ * Note: the dentry argument is the parent dentry. */ -static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr) +static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { unsigned long hash; int i; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 7b0fbc61af81..d204ee4235fd 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -26,8 +26,10 @@ #define BEQUIET -static int isofs_hashi(struct dentry *parent, struct qstr *qstr); -static int isofs_hash(struct dentry *parent, struct qstr *qstr); +static int isofs_hashi(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); +static int isofs_hash(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); static int isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, @@ -38,8 +40,10 @@ static int isofs_dentry_cmp(const struct dentry *parent, unsigned int len, const char *str, const struct qstr *name); #ifdef CONFIG_JOLIET -static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr); -static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr); +static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); +static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); static int isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, @@ -172,7 +176,7 @@ struct iso9660_options{ * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; @@ -193,7 +197,7 @@ isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms) * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; @@ -248,13 +252,15 @@ static int isofs_dentry_cmp_common( } static int -isofs_hash(struct dentry *dentry, struct qstr *qstr) +isofs_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hash_common(dentry, qstr, 0); } static int -isofs_hashi(struct dentry *dentry, struct qstr *qstr) +isofs_hashi(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 0); } @@ -277,13 +283,15 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, #ifdef CONFIG_JOLIET static int -isofs_hash_ms(struct dentry *dentry, struct qstr *qstr) +isofs_hash_ms(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hash_common(dentry, qstr, 1); } static int -isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr) +isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 1); } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 92129016cd79..57f90dad8919 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1574,7 +1574,8 @@ const struct file_operations jfs_dir_operations = { .llseek = generic_file_llseek, }; -static int jfs_ci_hash(struct dentry *dir, struct qstr *this) +static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, + struct qstr *this) { unsigned long hash; int i; diff --git a/fs/namei.c b/fs/namei.c index 4ff7ca530533..f3b5ca404659 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -731,7 +731,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, * to use its own hash.. */ if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { - int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name); + int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, + nd->path.dentry->d_inode, name); if (err < 0) return err; } @@ -1134,7 +1135,7 @@ static struct dentry *__lookup_hash(struct qstr *name, * to use its own hash.. */ if (base->d_op && base->d_op->d_hash) { - err = base->d_op->d_hash(base, name); + err = base->d_op->d_hash(base, inode, name); dentry = ERR_PTR(err); if (err < 0) goto out; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 3bcc68aed416..bbbf7922f422 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -74,7 +74,8 @@ const struct inode_operations ncp_dir_inode_operations = * Dentry operations routines */ static int ncp_lookup_validate(struct dentry *, struct nameidata *); -static int ncp_hash_dentry(struct dentry *, struct qstr *); +static int ncp_hash_dentry(const struct dentry *, const struct inode *, + struct qstr *); static int ncp_compare_dentry(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, unsigned int, const char *, const struct qstr *); @@ -129,9 +130,10 @@ static inline int ncp_case_sensitive(const struct inode *i) * is case-sensitive. */ static int -ncp_hash_dentry(struct dentry *dentry, struct qstr *this) +ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *this) { - if (!ncp_case_sensitive(dentry->d_inode)) { + if (!ncp_case_sensitive(inode)) { struct super_block *sb = dentry->d_sb; struct nls_table *t; unsigned long hash; @@ -597,7 +599,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, qname.hash = full_name_hash(qname.name, qname.len); if (dentry->d_op && dentry->d_op->d_hash) - if (dentry->d_op->d_hash(dentry, &qname) != 0) + if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0) goto end_advance; newdent = d_lookup(dentry, &qname); diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 11e7f7d11cd0..7507aeb4c90e 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -27,7 +27,8 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static int sysv_hash(struct dentry *dentry, struct qstr *qstr) +static int sysv_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { /* Truncate the name in place, avoids having to define a compare function. */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 75a072bf2a34..1149e706f04d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -133,7 +133,8 @@ enum dentry_d_lock_class struct dentry_operations { int (*d_revalidate)(struct dentry *, struct nameidata *); - int (*d_hash)(struct dentry *, struct qstr *); + int (*d_hash)(const struct dentry *, const struct inode *, + struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, unsigned int, const char *, const struct qstr *); -- cgit v1.2.3 From b7ab39f631f505edc2bbdb86620d5493f995c9da Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:32 +1100 Subject: fs: dcache scale dentry refcount Make d_count non-atomic and protect it with d_lock. This allows us to ensure a 0 refcount dentry remains 0 without dcache_lock. It is also fairly natural when we start protecting many other dentry members with d_lock. Signed-off-by: Nick Piggin --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- drivers/infiniband/hw/qib/qib_fs.c | 2 +- fs/autofs4/expire.c | 8 +-- fs/autofs4/root.c | 6 +- fs/ceph/dir.c | 4 +- fs/ceph/inode.c | 4 +- fs/ceph/mds_client.c | 2 +- fs/coda/dir.c | 2 +- fs/configfs/dir.c | 3 +- fs/configfs/inode.c | 2 +- fs/dcache.c | 106 +++++++++++++++++++++++------- fs/ecryptfs/inode.c | 2 +- fs/locks.c | 2 +- fs/namei.c | 2 +- fs/nfs/dir.c | 6 +- fs/nfs/unlink.c | 2 +- fs/nfsd/vfs.c | 5 +- fs/nilfs2/super.c | 2 +- include/linux/dcache.h | 29 ++++---- kernel/cgroup.c | 2 - 21 files changed, 126 insertions(+), 69 deletions(-) (limited to 'drivers') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 3532b92de983..29a406a77547 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -162,7 +162,7 @@ static void spufs_prune_dir(struct dentry *dir) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { - dget_locked(dentry); + dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(dir->d_inode, dentry); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 8c8afc716b98..18aee04a8411 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -280,7 +280,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked(tmp); + dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index f99bddc01716..fe4b242f0094 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -456,7 +456,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked(tmp); + dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index a796c9417fb1..413b5642e6cf 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -198,7 +198,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, else ino_count++; - if (atomic_read(&p->d_count) > ino_count) { + if (p->d_count > ino_count) { top_ino->last_used = jiffies; dput(p); return 1; @@ -347,7 +347,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, /* Path walk currently on this dentry? */ ino_count = atomic_read(&ino->count) + 2; - if (atomic_read(&dentry->d_count) > ino_count) + if (dentry->d_count > ino_count) goto next; /* Can we umount this guy */ @@ -369,7 +369,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, if (!exp_leaves) { /* Path walk currently on this dentry? */ ino_count = atomic_read(&ino->count) + 1; - if (atomic_read(&dentry->d_count) > ino_count) + if (dentry->d_count > ino_count) goto next; if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { @@ -383,7 +383,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, } else { /* Path walk currently on this dentry? */ ino_count = atomic_read(&ino->count) + 1; - if (atomic_read(&dentry->d_count) > ino_count) + if (dentry->d_count > ino_count) goto next; expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d34896cfb19f..7922509b5b2b 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -436,7 +436,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) spin_lock(&active->d_lock); /* Already gone? */ - if (atomic_read(&active->d_count) == 0) + if (active->d_count == 0) goto next; qstr = &active->d_name; @@ -452,7 +452,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) goto next; if (d_unhashed(active)) { - dget(active); + dget_dlock(active); spin_unlock(&active->d_lock); spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); @@ -507,7 +507,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) goto next; if (d_unhashed(expiring)) { - dget(expiring); + dget_dlock(expiring); spin_unlock(&expiring->d_lock); spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d902948a90d8..3ecf915a4550 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -150,7 +150,9 @@ more: di = ceph_dentry(dentry); } - atomic_inc(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count++; + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bf1286588f26..bb68c799074d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -879,8 +879,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, } else if (realdn) { dout("dn %p (%d) spliced with %p (%d) " "inode %p ino %llx.%llx\n", - dn, atomic_read(&dn->d_count), - realdn, atomic_read(&realdn->d_count), + dn, dn->d_count, + realdn, realdn->d_count, realdn->d_inode, ceph_vinop(realdn->d_inode)); dput(dn); dn = realdn; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 38800eaa81d0..a50fca1e03be 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1486,7 +1486,7 @@ retry: *base = ceph_ino(temp->d_inode); *plen = len; dout("build_path on %p %d built %llx '%.*s'\n", - dentry, atomic_read(&dentry->d_count), *base, len, path); + dentry, dentry->d_count, *base, len, path); return path; } diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 4cce3b07d9d7..9e37e8bc9b89 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -559,7 +559,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) if (cii->c_flags & C_FLUSH) coda_flag_inode_children(inode, C_FLUSH); - if (atomic_read(&de->d_count) > 1) + if (de->d_count > 1) /* pretend it's valid, but don't change the flags */ goto out; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 20024a9ef5a7..e9acea440ffc 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -394,8 +394,7 @@ static void remove_dir(struct dentry * d) if (d->d_inode) simple_rmdir(parent->d_inode,d); - pr_debug(" o %s removing done (%d)\n",d->d_name.name, - atomic_read(&d->d_count)); + pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count); dput(parent); } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 253476d78ed8..79b37765d8ff 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -253,7 +253,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry) && dentry->d_inode)) { - dget_locked(dentry); + dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/dcache.c b/fs/dcache.c index 3d3c843c36ed..81e91502b294 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -45,6 +45,7 @@ * - d_flags * - d_name * - d_lru + * - d_count * * Ordering: * dcache_lock @@ -125,6 +126,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { + BUG_ON(dentry->d_count); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -222,8 +224,11 @@ static struct dentry *d_kill(struct dentry *dentry) struct dentry *parent; list_del(&dentry->d_u.d_child); - /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ if (IS_ROOT(dentry)) parent = NULL; else @@ -303,13 +308,23 @@ void dput(struct dentry *dentry) return; repeat: - if (atomic_read(&dentry->d_count) == 1) + if (dentry->d_count == 1) might_sleep(); - if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) - return; - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { + if (dentry->d_count == 1) { + if (!spin_trylock(&dcache_lock)) { + /* + * Something of a livelock possibility we could avoid + * by taking dcache_lock and trying again, but we + * want to reduce dcache_lock anyway so this will + * get improved. + */ + spin_unlock(&dentry->d_lock); + goto repeat; + } + } + dentry->d_count--; + if (dentry->d_count) { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; @@ -389,7 +404,7 @@ int d_invalidate(struct dentry * dentry) * working directory or similar). */ spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { + if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); @@ -404,29 +419,61 @@ int d_invalidate(struct dentry * dentry) } EXPORT_SYMBOL(d_invalidate); -/* This should be called _only_ with dcache_lock held */ +/* This must be called with dcache_lock and d_lock held */ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) { - atomic_inc(&dentry->d_count); + dentry->d_count++; dentry_lru_del(dentry); return dentry; } +/* This should be called _only_ with dcache_lock held */ static inline struct dentry * __dget_locked(struct dentry *dentry) { - atomic_inc(&dentry->d_count); spin_lock(&dentry->d_lock); - dentry_lru_del(dentry); + __dget_locked_dlock(dentry); spin_unlock(&dentry->d_lock); return dentry; } +struct dentry * dget_locked_dlock(struct dentry *dentry) +{ + return __dget_locked_dlock(dentry); +} + struct dentry * dget_locked(struct dentry *dentry) { return __dget_locked(dentry); } EXPORT_SYMBOL(dget_locked); +struct dentry *dget_parent(struct dentry *dentry) +{ + struct dentry *ret; + +repeat: + spin_lock(&dentry->d_lock); + ret = dentry->d_parent; + if (!ret) + goto out; + if (dentry == ret) { + ret->d_count++; + goto out; + } + if (!spin_trylock(&ret->d_lock)) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto repeat; + } + BUG_ON(!ret->d_count); + ret->d_count++; + spin_unlock(&ret->d_lock); +out: + spin_unlock(&dentry->d_lock); + return ret; +} +EXPORT_SYMBOL(dget_parent); + /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question @@ -495,7 +542,7 @@ restart: spin_lock(&dcache_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!atomic_read(&dentry->d_count)) { + if (!dentry->d_count) { __dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -530,7 +577,10 @@ static void prune_one_dentry(struct dentry * dentry) */ while (dentry) { spin_lock(&dcache_lock); - if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) { + spin_lock(&dentry->d_lock); + dentry->d_count--; + if (dentry->d_count) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; } @@ -562,7 +612,7 @@ static void shrink_dentry_list(struct list_head *list) * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - if (atomic_read(&dentry->d_count)) { + if (dentry->d_count) { spin_unlock(&dentry->d_lock); continue; } @@ -783,7 +833,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) do { struct inode *inode; - if (atomic_read(&dentry->d_count) != 0) { + if (dentry->d_count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" " still in use (%d)" @@ -792,7 +842,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry->d_name.name, - atomic_read(&dentry->d_count), + dentry->d_count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); BUG(); @@ -802,7 +852,9 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) parent = NULL; else { parent = dentry->d_parent; - atomic_dec(&parent->d_count); + spin_lock(&parent->d_lock); + parent->d_count--; + spin_unlock(&parent->d_lock); } list_del(&dentry->d_u.d_child); @@ -853,7 +905,9 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - atomic_dec(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); while (!hlist_empty(&sb->s_anon)) { @@ -950,7 +1004,7 @@ resume: * move only zero ref count dentries to the end * of the unused list for prune_dcache */ - if (!atomic_read(&dentry->d_count)) { + if (!dentry->d_count) { dentry_lru_move_tail(dentry); found++; } else { @@ -1068,7 +1122,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) memcpy(dname, name->name, name->len); dname[name->len] = 0; - atomic_set(&dentry->d_count, 1); + dentry->d_count = 1; dentry->d_flags = DCACHE_UNHASHED; spin_lock_init(&dentry->d_lock); dentry->d_inode = NULL; @@ -1556,7 +1610,7 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) goto next; } - atomic_inc(&dentry->d_count); + dentry->d_count++; found = dentry; spin_unlock(&dentry->d_lock); break; @@ -1653,7 +1707,7 @@ void d_delete(struct dentry * dentry) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); - if (atomic_read(&dentry->d_count) == 1) { + if (dentry->d_count == 1) { dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_iput(dentry); fsnotify_nameremove(dentry, isdir); @@ -2494,11 +2548,15 @@ resume: this_parent = dentry; goto repeat; } - atomic_dec(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); } if (this_parent != root) { next = this_parent->d_u.d_child.next; - atomic_dec(&this_parent->d_count); + spin_lock(&this_parent->d_lock); + this_parent->d_count--; + spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; goto resume; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a1ed7a7cb173..5e5c7ec1fc98 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -260,7 +260,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, ecryptfs_dentry->d_parent)); lower_inode = lower_dentry->d_inode; fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); - BUG_ON(!atomic_read(&lower_dentry->d_count)); + BUG_ON(!lower_dentry->d_count); ecryptfs_set_dentry_private(ecryptfs_dentry, kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL)); diff --git a/fs/locks.c b/fs/locks.c index 8729347bcd1a..08415b2a6d36 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1389,7 +1389,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) goto out; if ((arg == F_WRLCK) - && ((atomic_read(&dentry->d_count) > 1) + && ((dentry->d_count > 1) || (atomic_read(&inode->i_count) > 1))) goto out; } diff --git a/fs/namei.c b/fs/namei.c index f3b5ca404659..cbfa5fb31072 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2133,7 +2133,7 @@ void dentry_unhash(struct dentry *dentry) shrink_dcache_parent(dentry); spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) == 2) + if (dentry->d_count == 2) __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9184c7c80f78..12de824edb5c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1720,7 +1720,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { + if (dentry->d_count > 1) { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); /* Start asynchronous writeout of the inode */ @@ -1868,7 +1868,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, - atomic_read(&new_dentry->d_count)); + new_dentry->d_count); /* * For non-directories, check whether the target is busy and if so, @@ -1886,7 +1886,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, rehash = new_dentry; } - if (atomic_read(&new_dentry->d_count) > 2) { + if (new_dentry->d_count > 2) { int err; /* copy the target dentry's name */ diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 7bdec8531400..8fe9eb47a97f 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -496,7 +496,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - atomic_read(&dentry->d_count)); + dentry->d_count); nfs_inc_stats(dir, NFSIOS_SILLYRENAME); /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 184938fcff04..3a359023c9f7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1756,8 +1756,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, goto out_dput_new; if (svc_msnfs(ffhp) && - ((atomic_read(&odentry->d_count) > 1) - || (atomic_read(&ndentry->d_count) > 1))) { + ((odentry->d_count > 1) || (ndentry->d_count > 1))) { host_err = -EPERM; goto out_dput_new; } @@ -1843,7 +1842,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (type != S_IFDIR) { /* It's UNLINK */ #ifdef MSNFS if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && - (atomic_read(&rdentry->d_count) > 1)) { + (rdentry->d_count > 1)) { host_err = -EPERM; } else #endif diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f804d41ec9d3..d36fc7ee615f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -838,7 +838,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, static int nilfs_tree_was_touched(struct dentry *root_dentry) { - return atomic_read(&root_dentry->d_count) > 1; + return root_dentry->d_count > 1; } /** diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2feb624b67f1..b0ade2d46805 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -87,7 +87,7 @@ full_name_hash(const unsigned char *name, unsigned int len) #endif struct dentry { - atomic_t d_count; + unsigned int d_count; /* protected by d_lock */ unsigned int d_flags; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ int d_mounted; @@ -297,17 +297,28 @@ extern char *dentry_path(struct dentry *, char *, int); * needs and they take necessary precautions) you should hold dcache_lock * and call dget_locked() instead of dget(). */ - +static inline struct dentry *dget_dlock(struct dentry *dentry) +{ + if (dentry) { + BUG_ON(!dentry->d_count); + dentry->d_count++; + } + return dentry; +} static inline struct dentry *dget(struct dentry *dentry) { if (dentry) { - BUG_ON(!atomic_read(&dentry->d_count)); - atomic_inc(&dentry->d_count); + spin_lock(&dentry->d_lock); + dget_dlock(dentry); + spin_unlock(&dentry->d_lock); } return dentry; } extern struct dentry * dget_locked(struct dentry *); +extern struct dentry * dget_locked_dlock(struct dentry *); + +extern struct dentry *dget_parent(struct dentry *dentry); /** * d_unhashed - is dentry hashed @@ -338,16 +349,6 @@ static inline void dont_mount(struct dentry *dentry) spin_unlock(&dentry->d_lock); } -static inline struct dentry *dget_parent(struct dentry *dentry) -{ - struct dentry *ret; - - spin_lock(&dentry->d_lock); - ret = dget(dentry->d_parent); - spin_unlock(&dentry->d_lock); - return ret; -} - extern void dput(struct dentry *); static inline int d_mountpoint(struct dentry *dentry) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 746055b214d7..eb7af39350c6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3655,9 +3655,7 @@ again: list_del(&cgrp->sibling); cgroup_unlock_hierarchy(cgrp->root); - spin_lock(&cgrp->dentry->d_lock); d = dget(cgrp->dentry); - spin_unlock(&d->d_lock); cgroup_d_remove_dir(d); dput(d); -- cgit v1.2.3 From da5029563a0a026c64821b09e8e7b4fd81d3fe1b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:33 +1100 Subject: fs: dcache scale d_unhashed Protect d_unhashed(dentry) condition with d_lock. This means keeping DCACHE_UNHASHED bit in synch with hash manipulations. Signed-off-by: Nick Piggin --- arch/powerpc/platforms/cell/spufs/inode.c | 3 ++ drivers/usb/core/inode.c | 3 ++ fs/autofs4/autofs_i.h | 13 ------ fs/autofs4/expire.c | 21 ++++++--- fs/ceph/dir.c | 5 ++- fs/configfs/configfs_internal.h | 2 + fs/dcache.c | 74 +++++++++++++++++++++---------- fs/libfs.c | 29 ++++++++---- fs/ocfs2/dcache.c | 5 ++- security/tomoyo/realpath.c | 1 + 10 files changed, 102 insertions(+), 54 deletions(-) (limited to 'drivers') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 29a406a77547..5aef1a7f5e4b 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -166,6 +166,9 @@ static void spufs_prune_dir(struct dentry *dir) __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(dir->d_inode, dentry); + /* XXX: what is dcache_lock protecting here? Other + * filesystems (IB, configfs) release dcache_lock + * before unlink */ spin_unlock(&dcache_lock); dput(dentry); } else { diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index b690aa35df9a..e3ab4437ea96 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -347,10 +347,13 @@ static int usbfs_empty (struct dentry *dentry) list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); + spin_lock(&de->d_lock); if (usbfs_positive(de)) { + spin_unlock(&de->d_lock); spin_unlock(&dcache_lock); return 0; } + spin_unlock(&de->d_lock); } spin_unlock(&dcache_lock); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 3d283abf67d7..3912dcf047e5 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -254,19 +254,6 @@ static inline int simple_positive(struct dentry *dentry) return dentry->d_inode && !d_unhashed(dentry); } -static inline int __simple_empty(struct dentry *dentry) -{ - struct dentry *child; - int ret = 0; - - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) - if (simple_positive(child)) - goto out; - ret = 1; -out: - return ret; -} - static inline void autofs4_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 413b5642e6cf..ee6402050f13 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -160,14 +160,18 @@ static int autofs4_tree_busy(struct vfsmount *mnt, spin_lock(&dcache_lock); for (p = top; p; p = next_dentry(p, top)) { + spin_lock(&p->d_lock); /* Negative dentry - give up */ - if (!simple_positive(p)) + if (!simple_positive(p)) { + spin_unlock(&p->d_lock); continue; + } DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget(p); + p = dget_dlock(p); + spin_unlock(&p->d_lock); spin_unlock(&dcache_lock); /* @@ -228,14 +232,18 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, spin_lock(&dcache_lock); for (p = parent; p; p = next_dentry(p, parent)) { + spin_lock(&p->d_lock); /* Negative dentry - give up */ - if (!simple_positive(p)) + if (!simple_positive(p)) { + spin_unlock(&p->d_lock); continue; + } DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget(p); + p = dget_dlock(p); + spin_unlock(&p->d_lock); spin_unlock(&dcache_lock); if (d_mountpoint(p)) { @@ -324,12 +332,15 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ + spin_lock(&dentry->d_lock); if (!simple_positive(dentry)) { next = next->next; + spin_unlock(&dentry->d_lock); continue; } - dentry = dget(dentry); + dentry = dget_dlock(dentry); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); spin_lock(&sbi->fs_lock); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 3ecf915a4550..571f270dca0f 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -136,6 +136,7 @@ more: fi->at_end = 1; goto out_unlock; } + spin_lock(&dentry->d_lock); if (!d_unhashed(dentry) && dentry->d_inode && ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && @@ -145,13 +146,13 @@ more: dentry->d_name.len, dentry->d_name.name, di->offset, filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", !dentry->d_inode ? " null" : ""); + spin_unlock(&dentry->d_lock); p = p->prev; dentry = list_entry(p, struct dentry, d_u.d_child); di = ceph_dentry(dentry); } - spin_lock(&dentry->d_lock); - dentry->d_count++; + dget_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index da6061a6df40..e58b4c30e216 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -121,6 +121,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry struct config_item * item = NULL; spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { struct configfs_dirent * sd = dentry->d_fsdata; if (sd->s_type & CONFIGFS_ITEM_LINK) { @@ -129,6 +130,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry } else item = config_item_get(sd->s_element); } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return item; diff --git a/fs/dcache.c b/fs/dcache.c index 81e91502b294..ee127f9ab274 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -46,6 +46,7 @@ * - d_name * - d_lru * - d_count + * - d_unhashed() * * Ordering: * dcache_lock @@ -53,6 +54,13 @@ * dcache_lru_lock * dcache_hash_lock * + * If there is an ancestor relationship: + * dentry->d_parent->...->d_parent->d_lock + * ... + * dentry->d_parent->d_lock + * dentry->d_lock + * + * If no ancestor relationship: * if (dentry1 < dentry2) * dentry1->d_lock * dentry2->d_lock @@ -379,7 +387,9 @@ int d_invalidate(struct dentry * dentry) * If it's already been dropped, return OK. */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 0; } @@ -388,9 +398,11 @@ int d_invalidate(struct dentry * dentry) * to get rid of unused child entries. */ if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); shrink_dcache_parent(dentry); spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); } /* @@ -403,7 +415,6 @@ int d_invalidate(struct dentry * dentry) * we might still populate it if it was a * working directory or similar). */ - spin_lock(&dentry->d_lock); if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); @@ -490,35 +501,44 @@ EXPORT_SYMBOL(dget_parent); * any other hashed alias over that one unless @want_discon is set, * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ - -static struct dentry * __d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { - struct list_head *head, *next, *tmp; - struct dentry *alias, *discon_alias=NULL; + struct dentry *alias, *discon_alias; - head = &inode->i_dentry; - next = inode->i_dentry.next; - while (next != head) { - tmp = next; - next = tmp->next; - prefetch(next); - alias = list_entry(tmp, struct dentry, d_alias); +again: + discon_alias = NULL; + list_for_each_entry(alias, &inode->i_dentry, d_alias) { + spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && - (alias->d_flags & DCACHE_DISCONNECTED)) + (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - else if (!want_discon) { - __dget_locked(alias); + } else if (!want_discon) { + __dget_locked_dlock(alias); + spin_unlock(&alias->d_lock); + return alias; + } + } + spin_unlock(&alias->d_lock); + } + if (discon_alias) { + alias = discon_alias; + spin_lock(&alias->d_lock); + if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { + if (IS_ROOT(alias) && + (alias->d_flags & DCACHE_DISCONNECTED)) { + __dget_locked_dlock(alias); + spin_unlock(&alias->d_lock); return alias; } } + spin_unlock(&alias->d_lock); + goto again; } - if (discon_alias) - __dget_locked(discon_alias); - return discon_alias; + return NULL; } -struct dentry * d_find_alias(struct inode *inode) +struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; @@ -801,8 +821,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); dentry_lru_del(dentry); - spin_unlock(&dentry->d_lock); __d_drop(dentry); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); for (;;) { @@ -817,8 +837,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) d_u.d_child) { spin_lock(&loop->d_lock); dentry_lru_del(loop); - spin_unlock(&loop->d_lock); __d_drop(loop); + spin_unlock(&loop->d_lock); cond_resched_lock(&dcache_lock); } spin_unlock(&dcache_lock); @@ -1863,7 +1883,10 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) /* * XXXX: do we really need to take target->d_lock? */ - if (target < dentry) { + if (d_ancestor(dentry, target)) { + spin_lock(&dentry->d_lock); + spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); + } else if (d_ancestor(target, dentry) || target < dentry) { spin_lock(&target->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { @@ -2542,13 +2565,16 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - if (d_unhashed(dentry)||!dentry->d_inode) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (d_unhashed(dentry) || !dentry->d_inode) { + spin_unlock(&dentry->d_lock); continue; + } if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); this_parent = dentry; goto repeat; } - spin_lock(&dentry->d_lock); dentry->d_count--; spin_unlock(&dentry->d_lock); } diff --git a/fs/libfs.c b/fs/libfs.c index b9d25d83e228..433e7139c23a 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -16,6 +16,11 @@ #include +static inline int simple_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -100,8 +105,10 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) while (n && p != &file->f_path.dentry->d_subdirs) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - if (!d_unhashed(next) && next->d_inode) + spin_lock(&next->d_lock); + if (simple_positive(next)) n--; + spin_unlock(&next->d_lock); p = p->next; } list_add_tail(&cursor->d_u.d_child, p); @@ -155,9 +162,13 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - if (d_unhashed(next) || !next->d_inode) + spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); + if (!simple_positive(next)) { + spin_unlock(&next->d_lock); continue; + } + spin_unlock(&next->d_lock); spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, @@ -259,20 +270,20 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den return 0; } -static inline int simple_positive(struct dentry *dentry) -{ - return dentry->d_inode && !d_unhashed(dentry); -} - int simple_empty(struct dentry *dentry) { struct dentry *child; int ret = 0; spin_lock(&dcache_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) - if (simple_positive(child)) + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { + spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); + if (simple_positive(child)) { + spin_unlock(&child->d_lock); goto out; + } + spin_unlock(&child->d_lock); + } ret = 1; out: spin_unlock(&dcache_lock); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 895532ac4d98..35e5f5a9ef59 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -174,13 +174,16 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, list_for_each(p, &inode->i_dentry) { dentry = list_entry(p, struct dentry, d_alias); + spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { mlog(0, "dentry found: %.*s\n", dentry->d_name.len, dentry->d_name.name); - dget_locked(dentry); + dget_locked_dlock(dentry); + spin_unlock(&dentry->d_lock); break; } + spin_unlock(&dentry->d_lock); dentry = NULL; } diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index 1d0bf8fa1922..d1e05b047715 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -14,6 +14,7 @@ #include #include #include "common.h" +#include "../../fs/internal.h" /** * tomoyo_encode: Convert binary string to ascii string. -- cgit v1.2.3 From 2fd6b7f50797f2e993eea59e0a0b8c6399c811dc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:34 +1100 Subject: fs: dcache scale subdirs Protect d_subdirs and d_child with d_lock, except in filesystems that aren't using dcache_lock for these anyway (eg. using i_mutex). Note: if we change the locking rule in future so that ->d_child protection is provided only with ->d_parent->d_lock, it may allow us to reduce some locking. But it would be an exception to an otherwise regular locking scheme, so we'd have to see some good results. Probably not worthwhile. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/cache.c | 4 + drivers/usb/core/inode.c | 8 +- fs/autofs4/autofs_i.h | 11 ++ fs/autofs4/expire.c | 127 ++++++++++----------- fs/autofs4/root.c | 18 ++- fs/ceph/dir.c | 6 +- fs/ceph/inode.c | 8 +- fs/coda/cache.c | 2 + fs/dcache.c | 248 +++++++++++++++++++++++++++++------------- fs/libfs.c | 24 +++- fs/ncpfs/dir.c | 3 + fs/ncpfs/ncplib_kernel.h | 4 + fs/notify/fsnotify.c | 4 +- include/linux/dcache.h | 1 + kernel/cgroup.c | 19 +++- security/selinux/selinuxfs.c | 12 +- 16 files changed, 339 insertions(+), 160 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index 0beded260b00..920434b6c071 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -63,6 +63,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -70,6 +71,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) smb_age_dentry(server, dentry); next = next->next; } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); } @@ -97,6 +99,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) /* If a pointer is invalid, we search the dentry. */ spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dent = list_entry(next, struct dentry, d_u.d_child); @@ -111,6 +114,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } dent = NULL; out_unlock: + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return dent; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index e3ab4437ea96..89a0e8366585 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -344,18 +344,20 @@ static int usbfs_empty (struct dentry *dentry) struct list_head *list; spin_lock(&dcache_lock); - + spin_lock(&dentry->d_lock); list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); - spin_lock(&de->d_lock); + + spin_lock_nested(&de->d_lock, DENTRY_D_LOCK_NESTED); if (usbfs_positive(de)) { spin_unlock(&de->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 0; } spin_unlock(&de->d_lock); } - + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 1; } diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 3912dcf047e5..9d2ae9b30d9f 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -254,6 +254,17 @@ static inline int simple_positive(struct dentry *dentry) return dentry->d_inode && !d_unhashed(dentry); } +static inline void __autofs4_add_expiring(struct dentry *dentry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + if (ino) { + if (list_empty(&ino->expiring)) + list_add(&ino->expiring, &sbi->expiring_list); + } + return; +} + static inline void autofs4_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index ee6402050f13..968c1434af62 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -91,24 +91,64 @@ done: } /* - * Calculate next entry in top down tree traversal. - * From next_mnt in namespace.c - elegant. + * Calculate and dget next entry in top down tree traversal. */ -static struct dentry *next_dentry(struct dentry *p, struct dentry *root) +static struct dentry *get_next_positive_dentry(struct dentry *prev, + struct dentry *root) { - struct list_head *next = p->d_subdirs.next; + struct list_head *next; + struct dentry *p, *ret; + + if (prev == NULL) + return dget(prev); + spin_lock(&dcache_lock); +relock: + p = prev; + spin_lock(&p->d_lock); +again: + next = p->d_subdirs.next; if (next == &p->d_subdirs) { while (1) { - if (p == root) + struct dentry *parent; + + if (p == root) { + spin_unlock(&p->d_lock); + spin_unlock(&dcache_lock); + dput(prev); return NULL; + } + + parent = p->d_parent; + if (!spin_trylock(&parent->d_lock)) { + spin_unlock(&p->d_lock); + cpu_relax(); + goto relock; + } + spin_unlock(&p->d_lock); next = p->d_u.d_child.next; - if (next != &p->d_parent->d_subdirs) + p = parent; + if (next != &parent->d_subdirs) break; - p = p->d_parent; } } - return list_entry(next, struct dentry, d_u.d_child); + ret = list_entry(next, struct dentry, d_u.d_child); + + spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); + /* Negative dentry - try next */ + if (!simple_positive(ret)) { + spin_unlock(&ret->d_lock); + p = ret; + goto again; + } + dget_dlock(ret); + spin_unlock(&ret->d_lock); + spin_unlock(&p->d_lock); + spin_unlock(&dcache_lock); + + dput(prev); + + return ret; } /* @@ -158,22 +198,11 @@ static int autofs4_tree_busy(struct vfsmount *mnt, if (!simple_positive(top)) return 1; - spin_lock(&dcache_lock); - for (p = top; p; p = next_dentry(p, top)) { - spin_lock(&p->d_lock); - /* Negative dentry - give up */ - if (!simple_positive(p)) { - spin_unlock(&p->d_lock); - continue; - } - + p = NULL; + while ((p = get_next_positive_dentry(p, top))) { DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget_dlock(p); - spin_unlock(&p->d_lock); - spin_unlock(&dcache_lock); - /* * Is someone visiting anywhere in the subtree ? * If there's no mount we need to check the usage @@ -208,10 +237,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, return 1; } } - dput(p); - spin_lock(&dcache_lock); } - spin_unlock(&dcache_lock); /* Timeout of a tree mount is ultimately determined by its top dentry */ if (!autofs4_can_expire(top, timeout, do_now)) @@ -230,36 +256,21 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, DPRINTK("parent %p %.*s", parent, (int)parent->d_name.len, parent->d_name.name); - spin_lock(&dcache_lock); - for (p = parent; p; p = next_dentry(p, parent)) { - spin_lock(&p->d_lock); - /* Negative dentry - give up */ - if (!simple_positive(p)) { - spin_unlock(&p->d_lock); - continue; - } - + p = NULL; + while ((p = get_next_positive_dentry(p, parent))) { DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget_dlock(p); - spin_unlock(&p->d_lock); - spin_unlock(&dcache_lock); - if (d_mountpoint(p)) { /* Can we umount this guy */ if (autofs4_mount_busy(mnt, p)) - goto cont; + continue; /* Can we expire this guy */ if (autofs4_can_expire(p, timeout, do_now)) return p; } -cont: - dput(p); - spin_lock(&dcache_lock); } - spin_unlock(&dcache_lock); return NULL; } @@ -310,8 +321,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, { unsigned long timeout; struct dentry *root = sb->s_root; + struct dentry *dentry; struct dentry *expired = NULL; - struct list_head *next; int do_now = how & AUTOFS_EXP_IMMEDIATE; int exp_leaves = how & AUTOFS_EXP_LEAVES; struct autofs_info *ino; @@ -323,26 +334,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, now = jiffies; timeout = sbi->exp_timeout; - spin_lock(&dcache_lock); - next = root->d_subdirs.next; - - /* On exit from the loop expire is set to a dgot dentry - * to expire or it's NULL */ - while ( next != &root->d_subdirs ) { - struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); - - /* Negative dentry - give up */ - spin_lock(&dentry->d_lock); - if (!simple_positive(dentry)) { - next = next->next; - spin_unlock(&dentry->d_lock); - continue; - } - - dentry = dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - + dentry = NULL; + while ((dentry = get_next_positive_dentry(dentry, root))) { spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); @@ -405,11 +398,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, } next: spin_unlock(&sbi->fs_lock); - dput(dentry); - spin_lock(&dcache_lock); - next = next->next; } - spin_unlock(&dcache_lock); return NULL; found: @@ -420,7 +409,11 @@ found: init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); spin_lock(&dcache_lock); + spin_lock(&expired->d_parent->d_lock); + spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); + spin_unlock(&expired->d_lock); + spin_unlock(&expired->d_parent->d_lock); spin_unlock(&dcache_lock); return expired; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 7922509b5b2b..7a9ed6b88291 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -143,10 +143,13 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) * it. */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return -ENOENT; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); out: @@ -253,7 +256,9 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) lookup_type = autofs4_need_mount(nd->flags); spin_lock(&sbi->fs_lock); spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); goto follow; @@ -266,6 +271,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) */ if (ino->flags & AUTOFS_INF_PENDING || (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); @@ -275,6 +281,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) goto follow; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); follow: @@ -347,10 +354,12 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) /* Check for a non-mountpoint directory with no contents */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); /* The daemon never causes a mount to trigger */ @@ -367,6 +376,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) return status; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 1; @@ -776,12 +786,16 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) return -EACCES; spin_lock(&dcache_lock); + spin_lock(&sbi->lookup_lock); + spin_lock(&dentry->d_lock); if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); return -ENOTEMPTY; } - autofs4_add_expiring(dentry); - spin_lock(&dentry->d_lock); + __autofs4_add_expiring(dentry); + spin_unlock(&sbi->lookup_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 571f270dca0f..2c924e8d85fe 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -113,6 +113,7 @@ static int __dcache_readdir(struct file *filp, last); spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); /* start at beginning? */ if (filp->f_pos == 2 || last == NULL || @@ -136,7 +137,7 @@ more: fi->at_end = 1; goto out_unlock; } - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (!d_unhashed(dentry) && dentry->d_inode && ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && @@ -154,6 +155,7 @@ more: dget_dlock(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, @@ -188,10 +190,12 @@ more: } spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); p = p->prev; /* advance to next dentry */ goto more; out_unlock: + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); out: if (last) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bb68c799074d..2c6944473366 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -842,11 +842,13 @@ static void ceph_set_dentry_offset(struct dentry *dn) spin_unlock(&inode->i_lock); spin_lock(&dcache_lock); - spin_lock(&dn->d_lock); + spin_lock(&dir->d_lock); + spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &dir->d_subdirs); dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, dn->d_u.d_child.prev, dn->d_u.d_child.next); spin_unlock(&dn->d_lock); + spin_unlock(&dir->d_lock); spin_unlock(&dcache_lock); } @@ -1232,9 +1234,11 @@ retry_lookup: } else { /* reorder parent's d_subdirs */ spin_lock(&dcache_lock); - spin_lock(&dn->d_lock); + spin_lock(&parent->d_lock); + spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &parent->d_subdirs); spin_unlock(&dn->d_lock); + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); } diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 9060f08e70cf..859393fca2b7 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -94,6 +94,7 @@ static void coda_flag_children(struct dentry *parent, int flag) struct dentry *de; spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); list_for_each(child, &parent->d_subdirs) { de = list_entry(child, struct dentry, d_u.d_child); @@ -102,6 +103,7 @@ static void coda_flag_children(struct dentry *parent, int flag) continue; coda_flag_inode(de->d_inode, flag); } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return; } diff --git a/fs/dcache.c b/fs/dcache.c index ee127f9ab274..a661247a20d5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -47,6 +47,8 @@ * - d_lru * - d_count * - d_unhashed() + * - d_parent and d_subdirs + * - childrens' d_child and d_parent * * Ordering: * dcache_lock @@ -223,24 +225,22 @@ static void dentry_lru_move_tail(struct dentry *dentry) * * If this is the root of the dentry tree, return NULL. * - * dcache_lock and d_lock must be held by caller, are dropped by d_kill. + * dcache_lock and d_lock and d_parent->d_lock must be held by caller, and + * are dropped by d_kill. */ -static struct dentry *d_kill(struct dentry *dentry) +static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) + __releases(parent->d_lock) __releases(dcache_lock) { - struct dentry *parent; - list_del(&dentry->d_u.d_child); + if (parent) + spin_unlock(&parent->d_lock); dentry_iput(dentry); /* * dentry_iput drops the locks, at which point nobody (except * transient RCU lookups) can reach this dentry. */ - if (IS_ROOT(dentry)) - parent = NULL; - else - parent = dentry->d_parent; d_free(dentry); return parent; } @@ -312,6 +312,7 @@ EXPORT_SYMBOL(d_drop); void dput(struct dentry *dentry) { + struct dentry *parent; if (!dentry) return; @@ -319,6 +320,10 @@ repeat: if (dentry->d_count == 1) might_sleep(); spin_lock(&dentry->d_lock); + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; if (dentry->d_count == 1) { if (!spin_trylock(&dcache_lock)) { /* @@ -330,10 +335,17 @@ repeat: spin_unlock(&dentry->d_lock); goto repeat; } + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + goto repeat; + } } dentry->d_count--; if (dentry->d_count) { spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return; } @@ -355,6 +367,8 @@ repeat: dentry_lru_add(dentry); spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return; @@ -363,7 +377,7 @@ unhash_it: kill_it: /* if dentry was on the d_lru list delete it from there */ dentry_lru_del(dentry); - dentry = d_kill(dentry); + dentry = d_kill(dentry, parent); if (dentry) goto repeat; } @@ -584,12 +598,13 @@ EXPORT_SYMBOL(d_prune_aliases); * quadratic behavior of shrink_dcache_parent(), but is also expected * to be beneficial in reducing dentry cache fragmentation. */ -static void prune_one_dentry(struct dentry * dentry) +static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) + __releases(parent->d_lock) __releases(dcache_lock) { __d_drop(dentry); - dentry = d_kill(dentry); + dentry = d_kill(dentry, parent); /* * Prune ancestors. Locking is simpler than in dput(), @@ -597,9 +612,20 @@ static void prune_one_dentry(struct dentry * dentry) */ while (dentry) { spin_lock(&dcache_lock); +again: spin_lock(&dentry->d_lock); + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dentry->d_lock); + goto again; + } dentry->d_count--; if (dentry->d_count) { + if (parent) + spin_unlock(&parent->d_lock); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return; @@ -607,7 +633,7 @@ static void prune_one_dentry(struct dentry * dentry) dentry_lru_del(dentry); __d_drop(dentry); - dentry = d_kill(dentry); + dentry = d_kill(dentry, parent); } } @@ -616,29 +642,40 @@ static void shrink_dentry_list(struct list_head *list) struct dentry *dentry; while (!list_empty(list)) { + struct dentry *parent; + dentry = list_entry(list->prev, struct dentry, d_lru); if (!spin_trylock(&dentry->d_lock)) { +relock: spin_unlock(&dcache_lru_lock); cpu_relax(); spin_lock(&dcache_lru_lock); continue; } - __dentry_lru_del(dentry); - /* * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ if (dentry->d_count) { + __dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; + if (parent && !spin_trylock(&parent->d_lock)) { + spin_unlock(&dentry->d_lock); + goto relock; + } + __dentry_lru_del(dentry); spin_unlock(&dcache_lru_lock); - prune_one_dentry(dentry); + prune_one_dentry(dentry, parent); /* dcache_lock and dentry->d_lock dropped */ spin_lock(&dcache_lock); spin_lock(&dcache_lru_lock); @@ -833,14 +870,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { - spin_lock(&loop->d_lock); + spin_lock_nested(&loop->d_lock, + DENTRY_D_LOCK_NESTED); dentry_lru_del(loop); __d_drop(loop); spin_unlock(&loop->d_lock); - cond_resched_lock(&dcache_lock); } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); /* move to the first child */ @@ -868,16 +907,17 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG(); } - if (IS_ROOT(dentry)) + if (IS_ROOT(dentry)) { parent = NULL; - else { + list_del(&dentry->d_u.d_child); + } else { parent = dentry->d_parent; spin_lock(&parent->d_lock); parent->d_count--; + list_del(&dentry->d_u.d_child); spin_unlock(&parent->d_lock); } - list_del(&dentry->d_u.d_child); detached++; inode = dentry->d_inode; @@ -958,6 +998,7 @@ int have_submounts(struct dentry *parent) spin_lock(&dcache_lock); if (d_mountpoint(parent)) goto positive; + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -965,22 +1006,34 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); /* Have we found a mount point ? */ - if (d_mountpoint(dentry)) + if (d_mountpoint(dentry)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&this_parent->d_lock); goto positive; + } if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { next = this_parent->d_u.d_child.next; + spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; + spin_lock(&this_parent->d_lock); goto resume; } + spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); return 0; /* No mount points found in tree */ positive: @@ -1010,6 +1063,7 @@ static int select_parent(struct dentry * parent) int found = 0; spin_lock(&dcache_lock); + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -1017,8 +1071,9 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + BUG_ON(this_parent == dentry); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); /* * move only zero ref count dentries to the end @@ -1031,33 +1086,44 @@ resume: dentry_lru_del(dentry); } - spin_unlock(&dentry->d_lock); - /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find * the rest. */ - if (found && need_resched()) + if (found && need_resched()) { + spin_unlock(&dentry->d_lock); goto out; + } /* * Descend a level if the d_subdirs list is non-empty. */ if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { + struct dentry *tmp; next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; + tmp = this_parent->d_parent; + spin_unlock(&this_parent->d_lock); + BUG_ON(tmp == this_parent); + this_parent = tmp; + spin_lock(&this_parent->d_lock); goto resume; } out: + spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); return found; } @@ -1155,18 +1221,19 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); INIT_LIST_HEAD(&dentry->d_alias); + INIT_LIST_HEAD(&dentry->d_u.d_child); if (parent) { - dentry->d_parent = dget(parent); + spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + dentry->d_parent = dget_dlock(parent); dentry->d_sb = parent->d_sb; - } else { - INIT_LIST_HEAD(&dentry->d_u.d_child); - } - - spin_lock(&dcache_lock); - if (parent) list_add(&dentry->d_u.d_child, &parent->d_subdirs); - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); + } this_cpu_inc(nr_dentry); @@ -1684,13 +1751,18 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) struct dentry *child; spin_lock(&dcache_lock); + spin_lock(&dparent->d_lock); list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { if (dentry == child) { - __dget_locked(dentry); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + __dget_locked_dlock(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dparent->d_lock); spin_unlock(&dcache_lock); return 1; } } + spin_unlock(&dparent->d_lock); spin_unlock(&dcache_lock); return 0; @@ -1802,17 +1874,6 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) } EXPORT_SYMBOL(dentry_update_name_case); -/* - * When switching names, the actual string doesn't strictly have to - * be preserved in the target - because we're dropping the target - * anyway. As such, we can just do a simple memcpy() to copy over - * the new name before we switch. - * - * Note that we have to be a lot more careful about getting the hash - * switched - we have to switch the hash value properly even if it - * then no longer matches the actual (corrupted) string of the target. - * The hash value has to match the hash queue that the dentry is on.. - */ static void switch_names(struct dentry *dentry, struct dentry *target) { if (dname_external(target)) { @@ -1854,18 +1915,53 @@ static void switch_names(struct dentry *dentry, struct dentry *target) swap(dentry->d_name.len, target->d_name.len); } +static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) +{ + /* + * XXXX: do we really need to take target->d_lock? + */ + if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) + spin_lock(&target->d_parent->d_lock); + else { + if (d_ancestor(dentry->d_parent, target->d_parent)) { + spin_lock(&dentry->d_parent->d_lock); + spin_lock_nested(&target->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } else { + spin_lock(&target->d_parent->d_lock); + spin_lock_nested(&dentry->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } + } + if (target < dentry) { + spin_lock_nested(&target->d_lock, 2); + spin_lock_nested(&dentry->d_lock, 3); + } else { + spin_lock_nested(&dentry->d_lock, 2); + spin_lock_nested(&target->d_lock, 3); + } +} + +static void dentry_unlock_parents_for_move(struct dentry *dentry, + struct dentry *target) +{ + if (target->d_parent != dentry->d_parent) + spin_unlock(&dentry->d_parent->d_lock); + if (target->d_parent != target) + spin_unlock(&target->d_parent->d_lock); +} + /* - * We cannibalize "target" when moving dentry on top of it, - * because it's going to be thrown away anyway. We could be more - * polite about it, though. - * - * This forceful removal will result in ugly /proc output if - * somebody holds a file open that got deleted due to a rename. - * We could be nicer about the deleted file, and let it show - * up under the name it had before it was deleted rather than - * under the original name of the file that was moved on top of it. + * When switching names, the actual string doesn't strictly have to + * be preserved in the target - because we're dropping the target + * anyway. As such, we can just do a simple memcpy() to copy over + * the new name before we switch. + * + * Note that we have to be a lot more careful about getting the hash + * switched - we have to switch the hash value properly even if it + * then no longer matches the actual (corrupted) string of the target. + * The hash value has to match the hash queue that the dentry is on.. */ - /* * d_move_locked - move a dentry * @dentry: entry to move @@ -1879,20 +1975,12 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); + BUG_ON(d_ancestor(dentry, target)); + BUG_ON(d_ancestor(target, dentry)); + write_seqlock(&rename_lock); - /* - * XXXX: do we really need to take target->d_lock? - */ - if (d_ancestor(dentry, target)) { - spin_lock(&dentry->d_lock); - spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); - } else if (d_ancestor(target, dentry) || target < dentry) { - spin_lock(&target->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - } else { - spin_lock(&dentry->d_lock); - spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); - } + + dentry_lock_for_move(dentry, target); /* Move the dentry to the target hash queue, if on different bucket */ spin_lock(&dcache_hash_lock); @@ -1924,6 +2012,8 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) } list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + + dentry_unlock_parents_for_move(dentry, target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); @@ -2013,17 +2103,20 @@ out_err: /* * Prepare an anonymous dentry for life in the superblock's dentry tree as a * named dentry in place of the dentry to be replaced. + * returns with anon->d_lock held! */ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) { struct dentry *dparent, *aparent; - switch_names(dentry, anon); - swap(dentry->d_name.hash, anon->d_name.hash); + dentry_lock_for_move(anon, dentry); dparent = dentry->d_parent; aparent = anon->d_parent; + switch_names(dentry, anon); + swap(dentry->d_name.hash, anon->d_name.hash); + dentry->d_parent = (aparent == anon) ? dentry : aparent; list_del(&dentry->d_u.d_child); if (!IS_ROOT(dentry)) @@ -2038,6 +2131,10 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) else INIT_LIST_HEAD(&anon->d_u.d_child); + dentry_unlock_parents_for_move(anon, dentry); + spin_unlock(&dentry->d_lock); + + /* anon->d_lock still locked, returns locked */ anon->d_flags &= ~DCACHE_DISCONNECTED; } @@ -2073,7 +2170,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) /* Is this an anonymous mountpoint that we could splice * into our tree? */ if (IS_ROOT(alias)) { - spin_lock(&alias->d_lock); __d_materialise_dentry(dentry, alias); __d_drop(alias); goto found; @@ -2558,6 +2654,7 @@ void d_genocide(struct dentry *root) struct list_head *next; spin_lock(&dcache_lock); + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -2571,8 +2668,10 @@ resume: continue; } if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dentry->d_lock); + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } dentry->d_count--; @@ -2580,12 +2679,13 @@ resume: } if (this_parent != root) { next = this_parent->d_u.d_child.next; - spin_lock(&this_parent->d_lock); this_parent->d_count--; spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; + spin_lock(&this_parent->d_lock); goto resume; } + spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); } diff --git a/fs/libfs.c b/fs/libfs.c index 433e7139c23a..cc4794914b52 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -81,7 +81,8 @@ int dcache_dir_close(struct inode *inode, struct file *file) loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) { - mutex_lock(&file->f_path.dentry->d_inode->i_mutex); + struct dentry *dentry = file->f_path.dentry; + mutex_lock(&dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -89,7 +90,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) if (offset >= 0) break; default: - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -100,22 +101,25 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) loff_t n = file->f_pos - 2; spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + /* d_lock not required for cursor */ list_del(&cursor->d_u.d_child); - p = file->f_path.dentry->d_subdirs.next; - while (n && p != &file->f_path.dentry->d_subdirs) { + p = dentry->d_subdirs.next; + while (n && p != &dentry->d_subdirs) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - spin_lock(&next->d_lock); + spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(next)) n--; spin_unlock(&next->d_lock); p = p->next; } list_add_tail(&cursor->d_u.d_child, p); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); } } - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); return offset; } @@ -156,6 +160,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) /* fallthrough */ default: spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (filp->f_pos == 2) list_move(q, &dentry->d_subdirs); @@ -169,6 +174,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) } spin_unlock(&next->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, @@ -176,11 +182,15 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) dt_type(next->d_inode)) < 0) return 0; spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); /* next is still alive */ list_move(q, p); + spin_unlock(&next->d_lock); p = q; filp->f_pos++; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); } return 0; @@ -276,6 +286,7 @@ int simple_empty(struct dentry *dentry) int ret = 0; spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(child)) { @@ -286,6 +297,7 @@ int simple_empty(struct dentry *dentry) } ret = 1; out: + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return ret; } diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index bbbf7922f422..102278ed38bd 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -392,6 +392,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) /* If a pointer is invalid, we search the dentry. */ spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dent = list_entry(next, struct dentry, d_u.d_child); @@ -400,11 +401,13 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dget_locked(dent); else dent = NULL; + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); goto out; } next = next->next; } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return NULL; diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 244d1b73fda7..c4b718ff9a6b 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -194,6 +194,7 @@ ncp_renew_dentries(struct dentry *parent) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -205,6 +206,7 @@ ncp_renew_dentries(struct dentry *parent) next = next->next; } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); } @@ -216,6 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -223,6 +226,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) ncp_age_dentry(server, dentry); next = next->next; } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 20dc218707ca..aa4f25e803f6 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -68,17 +68,19 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ + spin_lock(&alias->d_lock); list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { if (!child->d_inode) continue; - spin_lock(&child->d_lock); + spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (watched) child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } + spin_unlock(&alias->d_lock); } spin_unlock(&dcache_lock); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index b0ade2d46805..ddf4f55624f7 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -305,6 +305,7 @@ static inline struct dentry *dget_dlock(struct dentry *dentry) } return dentry; } + static inline struct dentry *dget(struct dentry *dentry) { if (dentry) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index eb7af39350c6..7b4705b51d4a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -877,23 +877,31 @@ static void cgroup_clear_directory(struct dentry *dentry) BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); if (d->d_inode) { /* This should never be called on a cgroup * directory with child cgroups */ BUG_ON(d->d_inode->i_mode & S_IFDIR); - d = dget_locked(d); + dget_locked_dlock(d); + spin_unlock(&d->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); d_delete(d); simple_unlink(dentry->d_inode, d); dput(d); spin_lock(&dcache_lock); - } + spin_lock(&dentry->d_lock); + } else + spin_unlock(&d->d_lock); node = dentry->d_subdirs.next; } + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); } @@ -902,10 +910,17 @@ static void cgroup_clear_directory(struct dentry *dentry) */ static void cgroup_d_remove_dir(struct dentry *dentry) { + struct dentry *parent; + cgroup_clear_directory(dentry); spin_lock(&dcache_lock); + parent = dentry->d_parent; + spin_lock(&parent->d_lock); + spin_lock(&dentry->d_lock); list_del_init(&dentry->d_u.d_child); + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); remove_dir(dentry); } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 073fd5b0a53a..017ec096446e 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1146,22 +1146,30 @@ static void sel_remove_entries(struct dentry *de) struct list_head *node; spin_lock(&dcache_lock); + spin_lock(&de->d_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); if (d->d_inode) { - d = dget_locked(d); + dget_locked_dlock(d); + spin_unlock(&de->d_lock); + spin_unlock(&d->d_lock); spin_unlock(&dcache_lock); d_delete(d); simple_unlink(de->d_inode, d); dput(d); spin_lock(&dcache_lock); - } + spin_lock(&de->d_lock); + } else + spin_unlock(&d->d_lock); node = de->d_subdirs.next; } + spin_unlock(&de->d_lock); spin_unlock(&dcache_lock); } -- cgit v1.2.3 From 949854d02455080d20cd3e1db28a3a18daf7599d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:37 +1100 Subject: fs: Use rename lock and RCU for multi-step operations The remaining usages for dcache_lock is to allow atomic, multi-step read-side operations over the directory tree by excluding modifications to the tree. Also, to walk in the leaf->root direction in the tree where we don't have a natural d_lock ordering. This could be accomplished by taking every d_lock, but this would mean a huge number of locks and actually gets very tricky. Solve this instead by using the rename seqlock for multi-step read-side operations, retry in case of a rename so we don't walk up the wrong parent. Concurrent dentry insertions are not serialised against. Concurrent deletes are tricky when walking up the directory: our parent might have been deleted when dropping locks so also need to check and retry for that. We can also use the rename lock in cases where livelock is a worry (and it is introduced in subsequent patch). Signed-off-by: Nick Piggin --- drivers/staging/pohmelfs/path_entry.c | 15 +++- fs/autofs4/waitq.c | 18 ++++- fs/dcache.c | 134 ++++++++++++++++++++++++++++------ fs/nfs/namespace.c | 14 +++- include/linux/dcache.h | 1 + 5 files changed, 152 insertions(+), 30 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index 8ec83d2dffb7..bbe42f42ca8f 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -83,10 +83,11 @@ out: int pohmelfs_path_length(struct pohmelfs_inode *pi) { struct dentry *d, *root, *first; - int len = 1; /* Root slash */ + int len; + unsigned seq; - first = d = d_find_alias(&pi->vfs_inode); - if (!d) { + first = d_find_alias(&pi->vfs_inode); + if (!first) { dprintk("%s: ino: %llu, mode: %o.\n", __func__, pi->ino, pi->vfs_inode.i_mode); return -ENOENT; } @@ -95,6 +96,11 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) root = dget(current->fs->root.dentry); spin_unlock(¤t->fs->lock); +rename_retry: + len = 1; /* Root slash */ + d = first; + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); spin_lock(&dcache_lock); if (!IS_ROOT(d) && d_unhashed(d)) @@ -105,6 +111,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) d = d->d_parent; } spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; dput(root); dput(first); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 2341375386f8..4be8f778a418 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -186,16 +186,25 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, { struct dentry *root = sbi->sb->s_root; struct dentry *tmp; - char *buf = *name; + char *buf; char *p; - int len = 0; - + int len; + unsigned seq; + +rename_retry: + buf = *name; + len = 0; + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); spin_lock(&dcache_lock); for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) len += tmp->d_name.len + 1; if (!len || --len > NAME_MAX) { spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return 0; } @@ -209,6 +218,9 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, strncpy(p, tmp->d_name.name, tmp->d_name.len); } spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return len; } diff --git a/fs/dcache.c b/fs/dcache.c index a09f0771fd27..a9bc4ecc21e1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -80,6 +80,7 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); +EXPORT_SYMBOL(rename_lock); EXPORT_SYMBOL(dcache_inode_lock); EXPORT_SYMBOL(dcache_lock); @@ -243,6 +244,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dcache_inode_lock) __releases(dcache_lock) { + dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); if (parent) spin_unlock(&parent->d_lock); @@ -1017,11 +1019,15 @@ void shrink_dcache_for_umount(struct super_block *sb) * Return true if the parent or its subdirectories contain * a mount point */ - int have_submounts(struct dentry *parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; + +rename_retry: + this_parent = parent; + seq = read_seqbegin(&rename_lock); spin_lock(&dcache_lock); if (d_mountpoint(parent)) @@ -1055,17 +1061,37 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_u.d_child.next; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + rcu_read_lock(); spin_unlock(&this_parent->d_lock); - this_parent = this_parent->d_parent; + child = this_parent; + this_parent = tmp; spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + read_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + spin_unlock(&dcache_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return 0; /* No mount points found in tree */ positive: spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return 1; } EXPORT_SYMBOL(have_submounts); @@ -1086,10 +1112,15 @@ EXPORT_SYMBOL(have_submounts); */ static int select_parent(struct dentry * parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; int found = 0; +rename_retry: + this_parent = parent; + seq = read_seqbegin(&rename_lock); + spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: @@ -1099,7 +1130,6 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - BUG_ON(this_parent == dentry); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -1142,17 +1172,32 @@ resume: */ if (this_parent != parent) { struct dentry *tmp; - next = this_parent->d_u.d_child.next; + struct dentry *child; + tmp = this_parent->d_parent; + rcu_read_lock(); spin_unlock(&this_parent->d_lock); - BUG_ON(tmp == this_parent); + child = this_parent; this_parent = tmp; spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + read_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + spin_unlock(&dcache_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } out: spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return found; } @@ -1654,7 +1699,7 @@ EXPORT_SYMBOL(d_add_ci); struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { struct dentry * dentry = NULL; - unsigned long seq; + unsigned seq; do { seq = read_seqbegin(&rename_lock); @@ -2290,7 +2335,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * @buffer: pointer to the end of the buffer * @buflen: pointer to buffer length * - * Caller holds the dcache_lock. + * Caller holds the rename_lock. * * If path is not reachable from the supplied root, then the value of * root is changed (without modifying refcounts). @@ -2377,7 +2422,9 @@ char *__d_path(const struct path *path, struct path *root, prepend(&res, &buflen, "\0", 1); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); if (error) @@ -2441,10 +2488,12 @@ char *d_path(const struct path *path, char *buf, int buflen) get_fs_root(current->fs, &root); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (error) res = ERR_PTR(error); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); path_put(&root); return res; @@ -2472,10 +2521,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) get_fs_root(current->fs, &root); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (!error && !path_equal(&tmp, &root)) error = prepend_unreachable(&res, &buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); path_put(&root); if (error) @@ -2544,7 +2595,9 @@ char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) char *retval; spin_lock(&dcache_lock); + write_seqlock(&rename_lock); retval = __dentry_path(dentry, buf, buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); return retval; @@ -2557,6 +2610,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) char *retval; spin_lock(&dcache_lock); + write_seqlock(&rename_lock); if (d_unlinked(dentry)) { p = buf + buflen; if (prepend(&p, &buflen, "//deleted", 10) != 0) @@ -2564,6 +2618,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) buflen++; } retval = __dentry_path(dentry, buf, buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); if (!IS_ERR(retval) && p) *p = '/'; /* restore '/' overriden with '\0' */ @@ -2604,6 +2659,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -ENOENT; spin_lock(&dcache_lock); + write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; struct path tmp = root; @@ -2612,6 +2668,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &tmp, &cwd, &buflen); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); if (error) @@ -2631,8 +2688,10 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) if (copy_to_user(buf, cwd, len)) error = -EFAULT; } - } else + } else { + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); + } out: path_put(&pwd); @@ -2660,25 +2719,25 @@ out: int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { int result; - unsigned long seq; + unsigned seq; if (new_dentry == old_dentry) return 1; - /* - * Need rcu_readlock to protect against the d_parent trashing - * due to d_move - */ - rcu_read_lock(); do { /* for restarting inner loop in case of seq retry */ seq = read_seqbegin(&rename_lock); + /* + * Need rcu_readlock to protect against the d_parent trashing + * due to d_move + */ + rcu_read_lock(); if (d_ancestor(old_dentry, new_dentry)) result = 1; else result = 0; + rcu_read_unlock(); } while (read_seqretry(&rename_lock, seq)); - rcu_read_unlock(); return result; } @@ -2710,9 +2769,13 @@ EXPORT_SYMBOL(path_is_under); void d_genocide(struct dentry *root) { - struct dentry *this_parent = root; + struct dentry *this_parent; struct list_head *next; + unsigned seq; +rename_retry: + this_parent = root; + seq = read_seqbegin(&rename_lock); spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: @@ -2722,6 +2785,7 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (d_unhashed(dentry) || !dentry->d_inode) { spin_unlock(&dentry->d_lock); @@ -2734,19 +2798,43 @@ resume: spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } - dentry->d_count--; + if (!(dentry->d_flags & DCACHE_GENOCIDE)) { + dentry->d_flags |= DCACHE_GENOCIDE; + dentry->d_count--; + } spin_unlock(&dentry->d_lock); } if (this_parent != root) { - next = this_parent->d_u.d_child.next; - this_parent->d_count--; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { + this_parent->d_flags |= DCACHE_GENOCIDE; + this_parent->d_count--; + } + rcu_read_lock(); spin_unlock(&this_parent->d_lock); - this_parent = this_parent->d_parent; + child = this_parent; + this_parent = tmp; spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + read_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + spin_unlock(&dcache_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } spin_unlock(&this_parent->d_lock); spin_unlock(&dcache_lock); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; } /** diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index db6aa3673cf3..78c0ebb0b07c 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -49,11 +49,17 @@ char *nfs_path(const char *base, const struct dentry *dentry, char *buffer, ssize_t buflen) { - char *end = buffer+buflen; + char *end; int namelen; + unsigned seq; +rename_retry: + end = buffer+buflen; *--end = '\0'; buflen--; + + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); spin_lock(&dcache_lock); while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; @@ -66,6 +72,9 @@ char *nfs_path(const char *base, dentry = dentry->d_parent; } spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; if (*end != '/') { if (--buflen < 0) goto Elong; @@ -83,6 +92,9 @@ char *nfs_path(const char *base, return end; Elong_unlock: spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; Elong: return ERR_PTR(-ENAMETOOLONG); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bda5ec0b077d..c963ebada922 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -180,6 +180,7 @@ struct dentry_operations { #define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ #define DCACHE_CANT_MOUNT 0x0100 +#define DCACHE_GENOCIDE 0x0200 extern spinlock_t dcache_inode_lock; extern spinlock_t dcache_lock; -- cgit v1.2.3 From b5c84bf6f6fa3a7dfdcb556023a62953574b60ee Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:38 +1100 Subject: fs: dcache remove dcache_lock dcache_lock no longer protects anything. remove it. Signed-off-by: Nick Piggin --- Documentation/filesystems/Locking | 16 +-- Documentation/filesystems/dentry-locking.txt | 40 ++++--- Documentation/filesystems/porting | 8 +- arch/powerpc/platforms/cell/spufs/inode.c | 5 +- drivers/infiniband/hw/ipath/ipath_fs.c | 6 +- drivers/infiniband/hw/qib/qib_fs.c | 3 - drivers/staging/pohmelfs/path_entry.c | 2 - drivers/staging/smbfs/cache.c | 4 - drivers/usb/core/inode.c | 3 - fs/9p/vfs_inode.c | 2 - fs/affs/amigaffs.c | 2 - fs/autofs4/autofs_i.h | 3 + fs/autofs4/expire.c | 10 +- fs/autofs4/root.c | 44 ++++---- fs/autofs4/waitq.c | 7 +- fs/ceph/dir.c | 6 +- fs/ceph/inode.c | 4 - fs/cifs/inode.c | 3 - fs/coda/cache.c | 2 - fs/configfs/configfs_internal.h | 2 - fs/configfs/inode.c | 6 +- fs/dcache.c | 160 ++++----------------------- fs/exportfs/expfs.c | 4 - fs/libfs.c | 8 -- fs/namei.c | 9 +- fs/ncpfs/dir.c | 3 - fs/ncpfs/ncplib_kernel.h | 4 - fs/nfs/dir.c | 3 - fs/nfs/getroot.c | 2 - fs/nfs/namespace.c | 3 - fs/notify/fsnotify.c | 2 - fs/ocfs2/dcache.c | 2 - include/linux/dcache.h | 5 +- include/linux/fs.h | 6 +- include/linux/fsnotify.h | 2 - include/linux/fsnotify_backend.h | 11 +- include/linux/namei.h | 1 - kernel/cgroup.c | 6 - mm/filemap.c | 3 - security/selinux/selinuxfs.c | 4 - 40 files changed, 109 insertions(+), 307 deletions(-) (limited to 'drivers') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index a15ee207b449..bdad6414dfa0 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -21,14 +21,14 @@ prototypes: char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); locking rules: - dcache_lock rename_lock ->d_lock may block -d_revalidate: no no no yes -d_hash no no no no -d_compare: no yes no no -d_delete: yes no yes no -d_release: no no no yes -d_iput: no no no yes -d_dname: no no no no + rename_lock ->d_lock may block +d_revalidate: no no yes +d_hash no no no +d_compare: yes no no +d_delete: no yes no +d_release: no no yes +d_iput: no no yes +d_dname: no no no --------------------------- inode_operations --------------------------- prototypes: diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt index 79334ed5daa7..30b6a40f5650 100644 --- a/Documentation/filesystems/dentry-locking.txt +++ b/Documentation/filesystems/dentry-locking.txt @@ -31,6 +31,7 @@ significant change is the way d_lookup traverses the hash chain, it doesn't acquire the dcache_lock for this and rely on RCU to ensure that the dentry has not been *freed*. +dcache_lock no longer exists, dentry locking is explained in fs/dcache.c Dcache locking details ====================== @@ -50,14 +51,12 @@ Safe lock-free look-up of dcache hash table Dcache is a complex data structure with the hash table entries also linked together in other lists. In 2.4 kernel, dcache_lock protected -all the lists. We applied RCU only on hash chain walking. The rest of -the lists are still protected by dcache_lock. Some of the important -changes are : +all the lists. RCU dentry hash walking works like this: 1. The deletion from hash chain is done using hlist_del_rcu() macro which doesn't initialize next pointer of the deleted dentry and this allows us to walk safely lock-free while a deletion is - happening. + happening. This is a standard hlist_rcu iteration. 2. Insertion of a dentry into the hash table is done using hlist_add_head_rcu() which take care of ordering the writes - the @@ -66,19 +65,18 @@ changes are : which has since been replaced by hlist_for_each_entry_rcu(), while walking the hash chain. The only requirement is that all initialization to the dentry must be done before - hlist_add_head_rcu() since we don't have dcache_lock protection - while traversing the hash chain. This isn't different from the - existing code. - -3. The dentry looked up without holding dcache_lock by cannot be - returned for walking if it is unhashed. It then may have a NULL - d_inode or other bogosity since RCU doesn't protect the other - fields in the dentry. We therefore use a flag DCACHE_UNHASHED to - indicate unhashed dentries and use this in conjunction with a - per-dentry lock (d_lock). Once looked up without the dcache_lock, - we acquire the per-dentry lock (d_lock) and check if the dentry is - unhashed. If so, the look-up is failed. If not, the reference count - of the dentry is increased and the dentry is returned. + hlist_add_head_rcu() since we don't have lock protection + while traversing the hash chain. + +3. The dentry looked up without holding locks cannot be returned for + walking if it is unhashed. It then may have a NULL d_inode or other + bogosity since RCU doesn't protect the other fields in the dentry. We + therefore use a flag DCACHE_UNHASHED to indicate unhashed dentries + and use this in conjunction with a per-dentry lock (d_lock). Once + looked up without locks, we acquire the per-dentry lock (d_lock) and + check if the dentry is unhashed. If so, the look-up is failed. If not, + the reference count of the dentry is increased and the dentry is + returned. 4. Once a dentry is looked up, it must be ensured during the path walk for that component it doesn't go away. In pre-2.5.10 code, this was @@ -86,10 +84,10 @@ changes are : In some sense, dcache_rcu path walking looks like the pre-2.5.10 version. -5. All dentry hash chain updates must take the dcache_lock as well as - the per-dentry lock in that order. dput() does this to ensure that - a dentry that has just been looked up in another CPU doesn't get - deleted before dget() can be done on it. +5. All dentry hash chain updates must take the per-dentry lock (see + fs/dcache.c). This excludes dput() to ensure that a dentry that has + been looked up concurrently does not get deleted before dget() can + take a ref. 6. There are several ways to do reference counting of RCU protected objects. One such example is in ipv4 route cache where deferred diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 9fd31940a8ef..1eb76959d096 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -216,7 +216,6 @@ had ->revalidate()) add calls in ->follow_link()/->readlink(). ->d_parent changes are not protected by BKL anymore. Read access is safe if at least one of the following is true: * filesystem has no cross-directory rename() - * dcache_lock is held * we know that parent had been locked (e.g. we are looking at ->d_parent of ->lookup() argument). * we are called from ->rename(). @@ -340,3 +339,10 @@ look at examples of other filesystems) for guidance. .d_hash() calling convention and locking rules are significantly changed. Read updated documentation in Documentation/filesystems/vfs.txt (and look at examples of other filesystems) for guidance. + +--- +[mandatory] + dcache_lock is gone, replaced by fine grained locks. See fs/dcache.c +for details of what locks to replace dcache_lock with in order to protect +particular things. Most of the time, a filesystem only needs ->d_lock, which +protects *all* the dcache state of a given dentry. diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 5aef1a7f5e4b..2662b50ea8d4 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -159,21 +159,18 @@ static void spufs_prune_dir(struct dentry *dir) mutex_lock(&dir->d_inode->i_mutex); list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(dir->d_inode, dentry); - /* XXX: what is dcache_lock protecting here? Other + /* XXX: what was dcache_lock protecting here? Other * filesystems (IB, configfs) release dcache_lock * before unlink */ - spin_unlock(&dcache_lock); dput(dentry); } else { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } } shrink_dcache_parent(dir); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 18aee04a8411..925e88227ded 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -277,18 +277,14 @@ static int remove_file(struct dentry *parent, char *name) goto bail; } - spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, tmp); - } else { + } else spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); - } ret = 0; bail: diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index fe4b242f0094..49af4a6538ba 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -453,17 +453,14 @@ static int remove_file(struct dentry *parent, char *name) goto bail; } - spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, tmp); } else { spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); } ret = 0; diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index bbe42f42ca8f..400a9fc386ad 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -101,7 +101,6 @@ rename_retry: d = first; seq = read_seqbegin(&rename_lock); rcu_read_lock(); - spin_lock(&dcache_lock); if (!IS_ROOT(d) && d_unhashed(d)) len += UNHASHED_OBSCURE_STRING_SIZE; /* Obscure " (deleted)" string */ @@ -110,7 +109,6 @@ rename_retry: len += d->d_name.len + 1; /* Plus slash */ d = d->d_parent; } - spin_unlock(&dcache_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index 920434b6c071..75dfd403fb90 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -62,7 +62,6 @@ smb_invalidate_dircache_entries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -72,7 +71,6 @@ smb_invalidate_dircache_entries(struct dentry *parent) next = next->next; } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } /* @@ -98,7 +96,6 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } /* If a pointer is invalid, we search the dentry. */ - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -115,7 +112,6 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dent = NULL; out_unlock: spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return dent; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 89a0e8366585..1b125c224dcf 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -343,7 +343,6 @@ static int usbfs_empty (struct dentry *dentry) { struct list_head *list; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); @@ -352,13 +351,11 @@ static int usbfs_empty (struct dentry *dentry) if (usbfs_positive(de)) { spin_unlock(&de->d_lock); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } spin_unlock(&de->d_lock); } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 1; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 47dfd5d29a6b..1073bca8488c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -270,13 +270,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); /* Directory should have only one entry. */ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return dentry; } diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 2321cc92d44f..600101a21ba2 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -128,7 +128,6 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) void *data = dentry->d_fsdata; struct list_head *head, *next; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); head = &inode->i_dentry; next = head->next; @@ -141,7 +140,6 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) next = next->next; } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 9d2ae9b30d9f..0fffe1c24cec 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -16,6 +16,7 @@ #include #include #include +#include #include /* This is the range of ioctl() numbers we claim as ours */ @@ -60,6 +61,8 @@ do { \ current->pid, __func__, ##args); \ } while (0) +extern spinlock_t autofs4_lock; + /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this structure. It holds a reference to the dentry, so dentries are never diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 968c1434af62..2f7951d67d16 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -102,7 +102,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev, if (prev == NULL) return dget(prev); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); relock: p = prev; spin_lock(&p->d_lock); @@ -114,7 +114,7 @@ again: if (p == root) { spin_unlock(&p->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); dput(prev); return NULL; } @@ -144,7 +144,7 @@ again: dget_dlock(ret); spin_unlock(&ret->d_lock); spin_unlock(&p->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); dput(prev); @@ -408,13 +408,13 @@ found: ino->flags |= AUTOFS_INF_EXPIRING; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&expired->d_parent->d_lock); spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); spin_unlock(&expired->d_lock); spin_unlock(&expired->d_parent->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return expired; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 7a9ed6b88291..10ca68a96dc7 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -23,6 +23,8 @@ #include "autofs_i.h" +DEFINE_SPINLOCK(autofs4_lock); + static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); static int autofs4_dir_unlink(struct inode *,struct dentry *); static int autofs4_dir_rmdir(struct inode *,struct dentry *); @@ -142,15 +144,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) * autofs file system so just let the libfs routines handle * it. */ - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&dentry->d_lock); if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return -ENOENT; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); out: return dcache_dir_open(inode, file); @@ -255,11 +257,11 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) /* We trigger a mount for almost all flags */ lookup_type = autofs4_need_mount(nd->flags); spin_lock(&sbi->fs_lock); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&dentry->d_lock); if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock); goto follow; } @@ -272,7 +274,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) if (ino->flags & AUTOFS_INF_PENDING || (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock); status = try_to_fill_dentry(dentry, nd->flags); @@ -282,7 +284,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) goto follow; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock); follow: /* @@ -353,14 +355,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; /* Check for a non-mountpoint directory with no contents */ - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&dentry->d_lock); if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); /* The daemon never causes a mount to trigger */ if (oz_mode) @@ -377,7 +379,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) return status; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return 1; } @@ -432,7 +434,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); head = &sbi->active_list; list_for_each(p, head) { @@ -465,14 +467,14 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) dget_dlock(active); spin_unlock(&active->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return active; } next: spin_unlock(&active->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return NULL; } @@ -487,7 +489,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); head = &sbi->expiring_list; list_for_each(p, head) { @@ -520,14 +522,14 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) dget_dlock(expiring); spin_unlock(&expiring->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return expiring; } next: spin_unlock(&expiring->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return NULL; } @@ -763,12 +765,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) dir->i_mtime = CURRENT_TIME; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); autofs4_add_expiring(dentry); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return 0; } @@ -785,20 +787,20 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi)) return -EACCES; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); spin_lock(&dentry->d_lock); if (!list_empty(&dentry->d_subdirs)) { spin_unlock(&dentry->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return -ENOTEMPTY; } __autofs4_add_expiring(dentry); spin_unlock(&sbi->lookup_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 4be8f778a418..c5f8459c905e 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -194,14 +194,15 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, rename_retry: buf = *name; len = 0; + seq = read_seqbegin(&rename_lock); rcu_read_lock(); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) len += tmp->d_name.len + 1; if (!len || --len > NAME_MAX) { - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; @@ -217,7 +218,7 @@ rename_retry: p -= tmp->d_name.len; strncpy(p, tmp->d_name.name, tmp->d_name.len); } - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 2c924e8d85fe..58abc3da6111 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -112,7 +112,6 @@ static int __dcache_readdir(struct file *filp, dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, last); - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); /* start at beginning? */ @@ -156,7 +155,6 @@ more: dget_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); @@ -182,21 +180,19 @@ more: filp->f_pos++; - /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ + /* make sure a dentry wasn't dropped while we didn't have parent lock */ if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); err = -EAGAIN; goto out; } - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); p = p->prev; /* advance to next dentry */ goto more; out_unlock: spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); out: if (last) dput(last); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2c6944473366..2a48cafc174b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -841,7 +841,6 @@ static void ceph_set_dentry_offset(struct dentry *dn) di->offset = ceph_inode(inode)->i_max_offset++; spin_unlock(&inode->i_lock); - spin_lock(&dcache_lock); spin_lock(&dir->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &dir->d_subdirs); @@ -849,7 +848,6 @@ static void ceph_set_dentry_offset(struct dentry *dn) dn->d_u.d_child.prev, dn->d_u.d_child.next); spin_unlock(&dn->d_lock); spin_unlock(&dir->d_lock); - spin_unlock(&dcache_lock); } /* @@ -1233,13 +1231,11 @@ retry_lookup: goto retry_lookup; } else { /* reorder parent's d_subdirs */ - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &parent->d_subdirs); spin_unlock(&dn->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } di = dn->d_fsdata; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 003698365ece..99b9a2cc14b7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -809,17 +809,14 @@ inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return true; } } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return false; } diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 859393fca2b7..5525e1c660fd 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -93,7 +93,6 @@ static void coda_flag_children(struct dentry *parent, int flag) struct list_head *child; struct dentry *de; - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); list_for_each(child, &parent->d_subdirs) { @@ -104,7 +103,6 @@ static void coda_flag_children(struct dentry *parent, int flag) coda_flag_inode(de->d_inode, flag); } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return; } diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index e58b4c30e216..026cf68553a4 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -120,7 +120,6 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry { struct config_item * item = NULL; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { struct configfs_dirent * sd = dentry->d_fsdata; @@ -131,7 +130,6 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry item = config_item_get(sd->s_element); } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return item; } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 79b37765d8ff..fb3a55fff82a 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -250,18 +250,14 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) struct dentry * dentry = sd->s_dentry; if (dentry) { - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry) && dentry->d_inode)) { dget_locked_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, dentry); - } else { + } else spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - } } } diff --git a/fs/dcache.c b/fs/dcache.c index a9bc4ecc21e1..0dbae053b664 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -54,11 +54,10 @@ * - d_alias, d_inode * * Ordering: - * dcache_lock - * dcache_inode_lock - * dentry->d_lock - * dcache_lru_lock - * dcache_hash_lock + * dcache_inode_lock + * dentry->d_lock + * dcache_lru_lock + * dcache_hash_lock * * If there is an ancestor relationship: * dentry->d_parent->...->d_parent->d_lock @@ -77,12 +76,10 @@ EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); -__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); EXPORT_SYMBOL(dcache_inode_lock); -EXPORT_SYMBOL(dcache_lock); static struct kmem_cache *dentry_cache __read_mostly; @@ -139,7 +136,7 @@ static void __d_free(struct rcu_head *head) } /* - * no dcache_lock, please. + * no locks, please. */ static void d_free(struct dentry *dentry) { @@ -162,7 +159,6 @@ static void d_free(struct dentry *dentry) static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) __releases(dcache_inode_lock) - __releases(dcache_lock) { struct inode *inode = dentry->d_inode; if (inode) { @@ -170,7 +166,6 @@ static void dentry_iput(struct dentry * dentry) list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -180,7 +175,6 @@ static void dentry_iput(struct dentry * dentry) } else { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } } @@ -235,14 +229,13 @@ static void dentry_lru_move_tail(struct dentry *dentry) * * If this is the root of the dentry tree, return NULL. * - * dcache_lock and d_lock and d_parent->d_lock must be held by caller, and - * are dropped by d_kill. + * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by + * d_kill. */ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) __releases(dcache_inode_lock) - __releases(dcache_lock) { dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); @@ -285,11 +278,9 @@ EXPORT_SYMBOL(__d_drop); void d_drop(struct dentry *dentry) { - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_drop); @@ -337,21 +328,10 @@ repeat: else parent = dentry->d_parent; if (dentry->d_count == 1) { - if (!spin_trylock(&dcache_lock)) { - /* - * Something of a livelock possibility we could avoid - * by taking dcache_lock and trying again, but we - * want to reduce dcache_lock anyway so this will - * get improved. - */ -drop1: - spin_unlock(&dentry->d_lock); - goto repeat; - } if (!spin_trylock(&dcache_inode_lock)) { drop2: - spin_unlock(&dcache_lock); - goto drop1; + spin_unlock(&dentry->d_lock); + goto repeat; } if (parent && !spin_trylock(&parent->d_lock)) { spin_unlock(&dcache_inode_lock); @@ -363,7 +343,6 @@ drop2: spin_unlock(&dentry->d_lock); if (parent) spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return; } @@ -387,7 +366,6 @@ drop2: if (parent) spin_unlock(&parent->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return; unhash_it: @@ -418,11 +396,9 @@ int d_invalidate(struct dentry * dentry) /* * If it's already been dropped, return OK. */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } /* @@ -431,9 +407,7 @@ int d_invalidate(struct dentry * dentry) */ if (!list_empty(&dentry->d_subdirs)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); shrink_dcache_parent(dentry); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); } @@ -450,19 +424,17 @@ int d_invalidate(struct dentry * dentry) if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return -EBUSY; } } __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } EXPORT_SYMBOL(d_invalidate); -/* This must be called with dcache_lock and d_lock held */ +/* This must be called with d_lock held */ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) { dentry->d_count++; @@ -470,7 +442,7 @@ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) return dentry; } -/* This should be called _only_ with dcache_lock held */ +/* This must be called with d_lock held */ static inline struct dentry * __dget_locked(struct dentry *dentry) { spin_lock(&dentry->d_lock); @@ -575,11 +547,9 @@ struct dentry *d_find_alias(struct inode *inode) struct dentry *de = NULL; if (!list_empty(&inode->i_dentry)) { - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); de = __d_find_alias(inode, 0); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } return de; } @@ -593,7 +563,6 @@ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); @@ -602,14 +571,12 @@ restart: __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_prune_aliases); @@ -625,17 +592,14 @@ static void prune_one_dentry(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) __releases(parent->d_lock) __releases(dcache_inode_lock) - __releases(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry, parent); /* - * Prune ancestors. Locking is simpler than in dput(), - * because dcache_lock needs to be taken anyway. + * Prune ancestors. */ while (dentry) { - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); again: spin_lock(&dentry->d_lock); @@ -653,7 +617,6 @@ again: spin_unlock(&parent->d_lock); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return; } @@ -702,8 +665,7 @@ relock: spin_unlock(&dcache_lru_lock); prune_one_dentry(dentry, parent); - /* dcache_lock, dcache_inode_lock and dentry->d_lock dropped */ - spin_lock(&dcache_lock); + /* dcache_inode_lock and dentry->d_lock dropped */ spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); } @@ -725,7 +687,6 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) LIST_HEAD(tmp); int cnt = *count; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); relock: spin_lock(&dcache_lru_lock); @@ -766,7 +727,6 @@ relock: list_splice(&referenced, &sb->s_dentry_lru); spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } /** @@ -788,7 +748,6 @@ static void prune_dcache(int count) if (unused == 0 || count == 0) return; - spin_lock(&dcache_lock); if (count >= unused) prune_ratio = 1; else @@ -825,11 +784,9 @@ static void prune_dcache(int count) if (down_read_trylock(&sb->s_umount)) { if ((sb->s_root != NULL) && (!list_empty(&sb->s_dentry_lru))) { - spin_unlock(&dcache_lock); __shrink_dcache_sb(sb, &w_count, DCACHE_REFERENCED); pruned -= w_count; - spin_lock(&dcache_lock); } up_read(&sb->s_umount); } @@ -845,7 +802,6 @@ static void prune_dcache(int count) if (p) __put_super(p); spin_unlock(&sb_lock); - spin_unlock(&dcache_lock); } /** @@ -859,7 +815,6 @@ void shrink_dcache_sb(struct super_block *sb) { LIST_HEAD(tmp); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { @@ -868,7 +823,6 @@ void shrink_dcache_sb(struct super_block *sb) } spin_unlock(&dcache_lru_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -885,12 +839,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); /* detach this root from the system */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); dentry_lru_del(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); for (;;) { /* descend to the first leaf in the current subtree */ @@ -899,7 +851,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { @@ -910,7 +861,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_unlock(&loop->d_lock); } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); /* move to the first child */ dentry = list_entry(dentry->d_subdirs.next, @@ -977,8 +927,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock, and only need dcache_lock when - * removing the dentry from the system lists and hashes because: + * - we don't need to use dentry->d_lock because: * - the superblock is detached from all mountings and open files, so the * dentry trees will not be rearranged by the VFS * - s_umount is write-locked, so the memory pressure shrinker will ignore @@ -1029,7 +978,6 @@ rename_retry: this_parent = parent; seq = read_seqbegin(&rename_lock); - spin_lock(&dcache_lock); if (d_mountpoint(parent)) goto positive; spin_lock(&this_parent->d_lock); @@ -1075,7 +1023,6 @@ resume: if (this_parent != child->d_parent || read_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); rcu_read_unlock(); goto rename_retry; } @@ -1084,12 +1031,10 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return 0; /* No mount points found in tree */ positive: - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return 1; @@ -1121,7 +1066,6 @@ rename_retry: this_parent = parent; seq = read_seqbegin(&rename_lock); - spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -1185,7 +1129,6 @@ resume: if (this_parent != child->d_parent || read_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); rcu_read_unlock(); goto rename_retry; } @@ -1195,7 +1138,6 @@ resume: } out: spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return found; @@ -1297,7 +1239,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_u.d_child); if (parent) { - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); dentry->d_parent = dget_dlock(parent); @@ -1305,7 +1246,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) list_add(&dentry->d_u.d_child, &parent->d_subdirs); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } this_cpu_inc(nr_dentry); @@ -1325,7 +1265,6 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); -/* the caller must hold dcache_lock */ static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); @@ -1354,11 +1293,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); __d_instantiate(entry, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(entry, inode); } EXPORT_SYMBOL(d_instantiate); @@ -1422,11 +1359,9 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); result = __d_instantiate_unique(entry, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (!result) { security_d_instantiate(entry, inode); @@ -1515,12 +1450,11 @@ struct dentry *d_obtain_alias(struct inode *inode) } tmp->d_parent = tmp; /* make sure dput doesn't croak */ - spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); res = __d_find_alias(inode, 0); if (res) { spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); dput(tmp); goto out_iput; } @@ -1538,7 +1472,6 @@ struct dentry *d_obtain_alias(struct inode *inode) spin_unlock(&tmp->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return tmp; out_iput: @@ -1568,21 +1501,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) struct dentry *new = NULL; if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(new, inode); d_move(new, dentry); iput(inode); } else { - /* already taking dcache_lock, so d_add() by hand */ + /* already taking dcache_inode_lock, so d_add() by hand */ __d_instantiate(dentry, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); } @@ -1655,12 +1585,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { __d_instantiate(found, inode); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(found, inode); return found; } @@ -1672,7 +1600,6 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, new = list_entry(inode->i_dentry.next, struct dentry, d_alias); dget_locked(new); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); security_d_instantiate(found, inode); d_move(new, found); iput(inode); @@ -1843,7 +1770,6 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) { struct dentry *child; - spin_lock(&dcache_lock); spin_lock(&dparent->d_lock); list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { if (dentry == child) { @@ -1851,12 +1777,10 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) __dget_locked_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dparent->d_lock); - spin_unlock(&dcache_lock); return 1; } } spin_unlock(&dparent->d_lock); - spin_unlock(&dcache_lock); return 0; } @@ -1889,7 +1813,6 @@ void d_delete(struct dentry * dentry) /* * Are we the only user? */ - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); spin_lock(&dentry->d_lock); isdir = S_ISDIR(dentry->d_inode->i_mode); @@ -1905,7 +1828,6 @@ void d_delete(struct dentry * dentry) spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); fsnotify_nameremove(dentry, isdir); } @@ -1932,13 +1854,11 @@ static void _d_rehash(struct dentry * entry) void d_rehash(struct dentry * entry) { - spin_lock(&dcache_lock); spin_lock(&entry->d_lock); spin_lock(&dcache_hash_lock); _d_rehash(entry); spin_unlock(&dcache_hash_lock); spin_unlock(&entry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_rehash); @@ -1961,11 +1881,9 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(dentry_update_name_case); @@ -2058,14 +1976,14 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, * The hash value has to match the hash queue that the dentry is on.. */ /* - * d_move_locked - move a dentry + * d_move - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. */ -static void d_move_locked(struct dentry * dentry, struct dentry * target) +void d_move(struct dentry * dentry, struct dentry * target) { if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2114,22 +2032,6 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); } - -/** - * d_move - move a dentry - * @dentry: entry to move - * @target: new dentry - * - * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. - */ - -void d_move(struct dentry * dentry, struct dentry * target) -{ - spin_lock(&dcache_lock); - d_move_locked(dentry, target); - spin_unlock(&dcache_lock); -} EXPORT_SYMBOL(d_move); /** @@ -2155,13 +2057,12 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex and the dcache_lock + * dentry->d_parent->d_inode->i_mutex and the dcache_inode_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) - __releases(dcache_lock) __releases(dcache_inode_lock) { struct mutex *m1 = NULL, *m2 = NULL; @@ -2185,11 +2086,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - d_move_locked(alias, dentry); + d_move(alias, dentry); ret = alias; out_err: spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (m2) mutex_unlock(m2); if (m1) @@ -2249,7 +2149,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); if (!inode) { @@ -2295,7 +2194,6 @@ found: spin_unlock(&dcache_hash_lock); spin_unlock(&actual->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); @@ -2307,7 +2205,6 @@ out_nolock: shouldnt_be_hashed: spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); BUG(); } EXPORT_SYMBOL_GPL(d_materialise_unique); @@ -2421,11 +2318,9 @@ char *__d_path(const struct path *path, struct path *root, int error; prepend(&res, &buflen, "\0", 1); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); if (error) return ERR_PTR(error); @@ -2487,14 +2382,12 @@ char *d_path(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (error) res = ERR_PTR(error); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); path_put(&root); return res; } @@ -2520,14 +2413,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (!error && !path_equal(&tmp, &root)) error = prepend_unreachable(&res, &buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); path_put(&root); if (error) res = ERR_PTR(error); @@ -2594,11 +2485,9 @@ char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) { char *retval; - spin_lock(&dcache_lock); write_seqlock(&rename_lock); retval = __dentry_path(dentry, buf, buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); return retval; } @@ -2609,7 +2498,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) char *p = NULL; char *retval; - spin_lock(&dcache_lock); write_seqlock(&rename_lock); if (d_unlinked(dentry)) { p = buf + buflen; @@ -2619,12 +2507,10 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) } retval = __dentry_path(dentry, buf, buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); if (!IS_ERR(retval) && p) *p = '/'; /* restore '/' overriden with '\0' */ return retval; Elong: - spin_unlock(&dcache_lock); return ERR_PTR(-ENAMETOOLONG); } @@ -2658,7 +2544,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd(current->fs, &root, &pwd); error = -ENOENT; - spin_lock(&dcache_lock); write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; @@ -2669,7 +2554,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &tmp, &cwd, &buflen); write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); if (error) goto out; @@ -2690,7 +2574,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } } else { write_sequnlock(&rename_lock); - spin_unlock(&dcache_lock); } out: @@ -2776,7 +2659,6 @@ void d_genocide(struct dentry *root) rename_retry: this_parent = root; seq = read_seqbegin(&rename_lock); - spin_lock(&dcache_lock); spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; @@ -2823,7 +2705,6 @@ resume: if (this_parent != child->d_parent || read_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); rcu_read_unlock(); goto rename_retry; } @@ -2832,7 +2713,6 @@ resume: goto resume; } spin_unlock(&this_parent->d_lock); - spin_unlock(&dcache_lock); if (read_seqretry(&rename_lock, seq)) goto rename_retry; } diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 84b8c460a781..53a5c08fb63c 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -47,24 +47,20 @@ find_acceptable_alias(struct dentry *result, if (acceptable(context, result)) return result; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { dget_locked(dentry); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (toput) dput(toput); if (dentry != result && acceptable(context, dentry)) { dput(result); return dentry; } - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); toput = dentry; } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); if (toput) dput(toput); diff --git a/fs/libfs.c b/fs/libfs.c index cc4794914b52..28b36663c44e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -100,7 +100,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) struct dentry *cursor = file->private_data; loff_t n = file->f_pos - 2; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); /* d_lock not required for cursor */ list_del(&cursor->d_u.d_child); @@ -116,7 +115,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) } list_add_tail(&cursor->d_u.d_child, p); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } } mutex_unlock(&dentry->d_inode->i_mutex); @@ -159,7 +157,6 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) i++; /* fallthrough */ default: - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (filp->f_pos == 2) list_move(q, &dentry->d_subdirs); @@ -175,13 +172,11 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) spin_unlock(&next->d_lock); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) return 0; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); /* next is still alive */ @@ -191,7 +186,6 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) filp->f_pos++; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } return 0; } @@ -285,7 +279,6 @@ int simple_empty(struct dentry *dentry) struct dentry *child; int ret = 0; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); @@ -298,7 +291,6 @@ int simple_empty(struct dentry *dentry) ret = 1; out: spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return ret; } diff --git a/fs/namei.c b/fs/namei.c index cbfa5fb31072..5642bc2be418 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -612,8 +612,8 @@ int follow_up(struct path *path) return 1; } -/* no need for dcache_lock, as serialization is taken care in - * namespace.c +/* + * serialization is taken care of in namespace.c */ static int __follow_mount(struct path *path) { @@ -645,9 +645,6 @@ static void follow_mount(struct path *path) } } -/* no need for dcache_lock, as serialization is taken care in - * namespace.c - */ int follow_down(struct path *path) { struct vfsmount *mounted; @@ -2131,12 +2128,10 @@ void dentry_unhash(struct dentry *dentry) { dget(dentry); shrink_dcache_parent(dentry); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (dentry->d_count == 2) __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } int vfs_rmdir(struct inode *dir, struct dentry *dentry) diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 102278ed38bd..de15c533311c 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -391,7 +391,6 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } /* If a pointer is invalid, we search the dentry. */ - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -402,13 +401,11 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) else dent = NULL; spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); goto out; } next = next->next; } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return NULL; out: diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index c4b718ff9a6b..1220df75ff22 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -193,7 +193,6 @@ ncp_renew_dentries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -207,7 +206,6 @@ ncp_renew_dentries(struct dentry *parent) next = next->next; } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } static inline void @@ -217,7 +215,6 @@ ncp_invalidate_dircache_entries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -227,7 +224,6 @@ ncp_invalidate_dircache_entries(struct dentry *parent) next = next->next; } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } struct ncp_cache_head { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 12de824edb5c..eb77471b8823 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1718,11 +1718,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (dentry->d_count > 1) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); error = nfs_sillyrename(dir, dentry); @@ -1733,7 +1731,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) need_rehash = 1; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); if (!error || error == -ENOENT) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 850f67d5f0ac..b3e36c3430de 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -63,13 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i * This again causes shrink_dcache_for_umount_subtree() to * Oops, since the test for IS_ROOT() will fail. */ - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); spin_lock(&sb->s_root->d_lock); list_del_init(&sb->s_root->d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } return 0; } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 78c0ebb0b07c..74aaf3963c10 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -60,7 +60,6 @@ rename_retry: seq = read_seqbegin(&rename_lock); rcu_read_lock(); - spin_lock(&dcache_lock); while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; buflen -= namelen + 1; @@ -71,7 +70,6 @@ rename_retry: *--end = '/'; dentry = dentry->d_parent; } - spin_unlock(&dcache_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; @@ -91,7 +89,6 @@ rename_retry: memcpy(end, base, namelen); return end; Elong_unlock: - spin_unlock(&dcache_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ae769fc9b66c..9be6ec1f36d8 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -59,7 +59,6 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) /* determine if the children should tell inode about their events */ watched = fsnotify_inode_watches_children(inode); - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ @@ -84,7 +83,6 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_unlock(&alias->d_lock); } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); } /* Notify this dentry's parent about a child's events. */ diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index c31b5c647ac7..b7de749bdd12 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -169,7 +169,6 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct list_head *p; struct dentry *dentry = NULL; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each(p, &inode->i_dentry) { dentry = list_entry(p, struct dentry, d_alias); @@ -189,7 +188,6 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return dentry; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c963ebada922..a2ceb94b0e38 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -183,7 +183,6 @@ struct dentry_operations { #define DCACHE_GENOCIDE 0x0200 extern spinlock_t dcache_inode_lock; -extern spinlock_t dcache_lock; extern seqlock_t rename_lock; static inline int dname_external(struct dentry *dentry) @@ -296,8 +295,8 @@ extern char *dentry_path(struct dentry *, char *, int); * destroyed when it has references. dget() should never be * called for dentries with zero reference counter. For these cases * (preferably none, functions in dcache.c are sufficient for normal - * needs and they take necessary precautions) you should hold dcache_lock - * and call dget_locked() instead of dget(). + * needs and they take necessary precautions) you should hold d_lock + * and call dget_dlock() instead of dget(). */ static inline struct dentry *dget_dlock(struct dentry *dentry) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 090f0eacde29..296cf2fde945 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1378,7 +1378,7 @@ struct super_block { #else struct list_head s_files; #endif - /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ + /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ @@ -2446,6 +2446,10 @@ static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; + /* + * Don't strictly need d_lock here? If the parent ino could change + * then surely we'd have a deeper race in the caller? + */ spin_lock(&dentry->d_lock); res = dentry->d_parent->d_inode->i_ino; spin_unlock(&dentry->d_lock); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index b10bcdeaef76..2a53f10712b3 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,7 +17,6 @@ /* * fsnotify_d_instantiate - instantiate a dentry for inode - * Called with dcache_lock held. */ static inline void fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) @@ -62,7 +61,6 @@ static inline int fsnotify_perm(struct file *file, int mask) /* * fsnotify_d_move - dentry has been moved - * Called with dcache_lock and dentry->d_lock held. */ static inline void fsnotify_d_move(struct dentry *dentry) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7380763595d3..69ad89b50489 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -329,9 +329,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) { struct dentry *parent; - assert_spin_locked(&dcache_lock); assert_spin_locked(&dentry->d_lock); + /* + * Serialisation of setting PARENT_WATCHED on the dentries is provided + * by d_lock. If inotify_inode_watched changes after we have taken + * d_lock, the following __fsnotify_update_child_dentry_flags call will + * find our entry, so it will spin until we complete here, and update + * us with the new state. + */ parent = dentry->d_parent; if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; @@ -341,15 +347,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) /* * fsnotify_d_instantiate - instantiate a dentry for inode - * Called with dcache_lock held. */ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) { if (!inode) return; - assert_spin_locked(&dcache_lock); - spin_lock(&dentry->d_lock); __fsnotify_update_dcache_flags(dentry); spin_unlock(&dentry->d_lock); diff --git a/include/linux/namei.h b/include/linux/namei.h index 05b441d93642..aec730b53935 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -41,7 +41,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; * - require a directory * - ending slashes ok even for nonexistent files * - internal "there are more path components" flag - * - locked when lookup done with dcache_lock held * - dentry cache is untrusted; force a real lookup */ #define LOOKUP_FOLLOW 1 diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7b4705b51d4a..1864cb6a6a59 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -876,7 +876,6 @@ static void cgroup_clear_directory(struct dentry *dentry) struct list_head *node; BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { @@ -891,18 +890,15 @@ static void cgroup_clear_directory(struct dentry *dentry) dget_locked_dlock(d); spin_unlock(&d->d_lock); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); d_delete(d); simple_unlink(dentry->d_inode, d); dput(d); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); } else spin_unlock(&d->d_lock); node = dentry->d_subdirs.next; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } /* @@ -914,14 +910,12 @@ static void cgroup_d_remove_dir(struct dentry *dentry) cgroup_clear_directory(dentry); - spin_lock(&dcache_lock); parent = dentry->d_parent; spin_lock(&parent->d_lock); spin_lock(&dentry->d_lock); list_del_init(&dentry->d_u.d_child); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); remove_dir(dentry); } diff --git a/mm/filemap.c b/mm/filemap.c index 6b9aee20f242..ca389394fa2a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -102,9 +102,6 @@ * ->inode_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->__set_page_dirty_buffers) * - * ->task->proc_lock - * ->dcache_lock (proc_pid_lookup) - * * (code doesn't rely on that order, so you could switch it around) * ->tasklist_lock (memory_failure, collect_procs_ao) * ->i_mmap_lock diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 017ec096446e..2285d693f296 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1145,7 +1145,6 @@ static void sel_remove_entries(struct dentry *de) { struct list_head *node; - spin_lock(&dcache_lock); spin_lock(&de->d_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { @@ -1158,11 +1157,9 @@ static void sel_remove_entries(struct dentry *de) dget_locked_dlock(d); spin_unlock(&de->d_lock); spin_unlock(&d->d_lock); - spin_unlock(&dcache_lock); d_delete(d); simple_unlink(de->d_inode, d); dput(d); - spin_lock(&dcache_lock); spin_lock(&de->d_lock); } else spin_unlock(&d->d_lock); @@ -1170,7 +1167,6 @@ static void sel_remove_entries(struct dentry *de) } spin_unlock(&de->d_lock); - spin_unlock(&dcache_lock); } #define BOOL_DIR_NAME "booleans" -- cgit v1.2.3 From dc0474be3e27463d4d4a2793f82366eed906f223 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:43 +1100 Subject: fs: dcache rationalise dget variants dget_locked was a shortcut to avoid the lazy lru manipulation when we already held dcache_lock (lru manipulation was relatively cheap at that point). However, how that the lru lock is an innermost one, we never hold it at any caller, so the lock cost can now be avoided. We already have well working lazy dcache LRU, so it should be fine to defer LRU manipulations to scan time. Signed-off-by: Nick Piggin --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- drivers/infiniband/hw/qib/qib_fs.c | 2 +- drivers/staging/smbfs/cache.c | 2 +- fs/configfs/inode.c | 2 +- fs/dcache.c | 36 ++++++++++--------------------- fs/exportfs/expfs.c | 2 +- fs/ncpfs/dir.c | 2 +- fs/ocfs2/dcache.c | 2 +- include/linux/dcache.h | 15 +++---------- kernel/cgroup.c | 2 +- security/selinux/selinuxfs.c | 2 +- 12 files changed, 24 insertions(+), 47 deletions(-) (limited to 'drivers') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 2662b50ea8d4..03185de7cd4a 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -161,7 +161,7 @@ static void spufs_prune_dir(struct dentry *dir) list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { - dget_locked_dlock(dentry); + dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(dir->d_inode, dentry); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 925e88227ded..31ae1b108aea 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -279,7 +279,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked_dlock(tmp); + dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(parent->d_inode, tmp); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 49af4a6538ba..df7fa251dcdc 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -455,7 +455,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked_dlock(tmp); + dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(parent->d_inode, tmp); diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index 75dfd403fb90..f2a1323ca827 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -102,7 +102,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) - dget_locked(dent); + dget(dent); else dent = NULL; goto out_unlock; diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index fb3a55fff82a..c83f4768eeaa 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -252,7 +252,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) if (dentry) { spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry) && dentry->d_inode)) { - dget_locked_dlock(dentry); + dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(parent->d_inode, dentry); diff --git a/fs/dcache.c b/fs/dcache.c index 01f016799fd4..b4d2e28eef5b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -429,32 +429,17 @@ int d_invalidate(struct dentry * dentry) EXPORT_SYMBOL(d_invalidate); /* This must be called with d_lock held */ -static inline struct dentry * __dget_locked_dlock(struct dentry *dentry) +static inline void __dget_dlock(struct dentry *dentry) { dentry->d_count++; - dentry_lru_del(dentry); - return dentry; } -/* This must be called with d_lock held */ -static inline struct dentry * __dget_locked(struct dentry *dentry) +static inline void __dget(struct dentry *dentry) { spin_lock(&dentry->d_lock); - __dget_locked_dlock(dentry); + __dget_dlock(dentry); spin_unlock(&dentry->d_lock); - return dentry; -} - -struct dentry * dget_locked_dlock(struct dentry *dentry) -{ - return __dget_locked_dlock(dentry); -} - -struct dentry * dget_locked(struct dentry *dentry) -{ - return __dget_locked(dentry); } -EXPORT_SYMBOL(dget_locked); struct dentry *dget_parent(struct dentry *dentry) { @@ -512,7 +497,7 @@ again: (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; } else if (!want_discon) { - __dget_locked_dlock(alias); + __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } @@ -525,7 +510,7 @@ again: if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { - __dget_locked_dlock(alias); + __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } @@ -561,7 +546,7 @@ restart: list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { - __dget_locked_dlock(dentry); + __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_inode_lock); @@ -1257,7 +1242,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) * don't need child lock because it is not subject * to concurrency here */ - dentry->d_parent = dget_dlock(parent); + __dget_dlock(parent); + dentry->d_parent = parent; dentry->d_sb = parent->d_sb; list_add(&dentry->d_u.d_child, &parent->d_subdirs); spin_unlock(&parent->d_lock); @@ -1360,7 +1346,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, continue; if (memcmp(qstr->name, name, len)) continue; - dget_locked(alias); + __dget(alias); return alias; } @@ -1613,7 +1599,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * reference to it, move it in place and use it. */ new = list_entry(inode->i_dentry.next, struct dentry, d_alias); - dget_locked(new); + __dget(new); spin_unlock(&dcache_inode_lock); security_d_instantiate(found, inode); d_move(new, found); @@ -1789,7 +1775,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { if (dentry == child) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - __dget_locked_dlock(dentry); + __dget_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dparent->d_lock); return 1; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 53a5c08fb63c..f06a940065f6 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -49,7 +49,7 @@ find_acceptable_alias(struct dentry *result, spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { - dget_locked(dentry); + dget(dentry); spin_unlock(&dcache_inode_lock); if (toput) dput(toput); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index de15c533311c..0ba3cdc95a44 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -397,7 +397,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) - dget_locked(dent); + dget(dent); else dent = NULL; spin_unlock(&parent->d_lock); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index b7de749bdd12..4d54c60ceee4 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -178,7 +178,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, mlog(0, "dentry found: %.*s\n", dentry->d_name.len, dentry->d_name.name); - dget_locked_dlock(dentry); + dget_dlock(dentry); spin_unlock(&dentry->d_lock); break; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index a2ceb94b0e38..ca648685f0cc 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -287,23 +287,17 @@ extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ /** - * dget, dget_locked - get a reference to a dentry + * dget, dget_dlock - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a dentry or %NULL pointer increment the reference count * if appropriate and return the dentry. A dentry will not be - * destroyed when it has references. dget() should never be - * called for dentries with zero reference counter. For these cases - * (preferably none, functions in dcache.c are sufficient for normal - * needs and they take necessary precautions) you should hold d_lock - * and call dget_dlock() instead of dget(). + * destroyed when it has references. */ static inline struct dentry *dget_dlock(struct dentry *dentry) { - if (dentry) { - BUG_ON(!dentry->d_count); + if (dentry) dentry->d_count++; - } return dentry; } @@ -317,9 +311,6 @@ static inline struct dentry *dget(struct dentry *dentry) return dentry; } -extern struct dentry * dget_locked(struct dentry *); -extern struct dentry * dget_locked_dlock(struct dentry *); - extern struct dentry *dget_parent(struct dentry *dentry); /** diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1864cb6a6a59..9f41470c3949 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -887,7 +887,7 @@ static void cgroup_clear_directory(struct dentry *dentry) /* This should never be called on a cgroup * directory with child cgroups */ BUG_ON(d->d_inode->i_mode & S_IFDIR); - dget_locked_dlock(d); + dget_dlock(d); spin_unlock(&d->d_lock); spin_unlock(&dentry->d_lock); d_delete(d); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 2285d693f296..43deac219491 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1154,7 +1154,7 @@ static void sel_remove_entries(struct dentry *de) list_del_init(node); if (d->d_inode) { - dget_locked_dlock(d); + dget_dlock(d); spin_unlock(&de->d_lock); spin_unlock(&d->d_lock); d_delete(d); -- cgit v1.2.3 From fa0d7e3de6d6fc5004ad9dea0dd6b286af8f03e9 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:49 +1100 Subject: fs: icache RCU free inodes RCU free the struct inode. This will allow: - Subsequent store-free path walking patch. The inode must be consulted for permissions when walking, so an RCU inode reference is a must. - sb_inode_list_lock to be moved inside i_lock because sb list walkers who want to take i_lock no longer need to take sb_inode_list_lock to walk the list in the first place. This will simplify and optimize locking. - Could remove some nested trylock loops in dcache code - Could potentially simplify things a bit in VM land. Do not need to take the page lock to follow page->mapping. The downsides of this is the performance cost of using RCU. In a simple creat/unlink microbenchmark, performance drops by about 10% due to inability to reuse cache-hot slab objects. As iterations increase and RCU freeing starts kicking over, this increases to about 20%. In cases where inode lifetimes are longer (ie. many inodes may be allocated during the average life span of a single inode), a lot of this cache reuse is not applicable, so the regression caused by this patch is smaller. The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU, however this adds some complexity to list walking and store-free path walking, so I prefer to implement this at a later date, if it is shown to be a win in real situations. I haven't found a regression in any non-micro benchmark so I doubt it will be a problem. Signed-off-by: Nick Piggin --- Documentation/filesystems/porting | 14 ++++++++++++++ arch/powerpc/platforms/cell/spufs/inode.c | 10 ++++++++-- drivers/staging/pohmelfs/inode.c | 9 ++++++++- drivers/staging/smbfs/inode.c | 9 ++++++++- fs/9p/vfs_inode.c | 9 ++++++++- fs/adfs/super.c | 9 ++++++++- fs/affs/super.c | 9 ++++++++- fs/afs/super.c | 10 +++++++++- fs/befs/linuxvfs.c | 10 ++++++++-- fs/bfs/inode.c | 9 ++++++++- fs/block_dev.c | 9 ++++++++- fs/btrfs/inode.c | 9 ++++++++- fs/ceph/inode.c | 11 ++++++++++- fs/cifs/cifsfs.c | 9 ++++++++- fs/coda/inode.c | 9 ++++++++- fs/ecryptfs/super.c | 12 +++++++++++- fs/efs/super.c | 9 ++++++++- fs/exofs/super.c | 9 ++++++++- fs/ext2/super.c | 9 ++++++++- fs/ext3/super.c | 9 ++++++++- fs/ext4/super.c | 9 ++++++++- fs/fat/inode.c | 9 ++++++++- fs/freevxfs/vxfs_inode.c | 9 ++++++++- fs/fuse/inode.c | 9 ++++++++- fs/gfs2/super.c | 9 ++++++++- fs/hfs/super.c | 9 ++++++++- fs/hfsplus/super.c | 10 +++++++++- fs/hostfs/hostfs_kern.c | 9 ++++++++- fs/hpfs/super.c | 9 ++++++++- fs/hppfs/hppfs.c | 9 ++++++++- fs/hugetlbfs/inode.c | 9 ++++++++- fs/inode.c | 10 +++++++++- fs/isofs/inode.c | 9 ++++++++- fs/jffs2/super.c | 9 ++++++++- fs/jfs/super.c | 10 +++++++++- fs/logfs/inode.c | 9 ++++++++- fs/minix/inode.c | 9 ++++++++- fs/ncpfs/inode.c | 9 ++++++++- fs/nfs/inode.c | 9 ++++++++- fs/nilfs2/super.c | 10 +++++++++- fs/ntfs/inode.c | 9 ++++++++- fs/ocfs2/dlmfs/dlmfs.c | 9 ++++++++- fs/ocfs2/super.c | 9 ++++++++- fs/openpromfs/inode.c | 9 ++++++++- fs/proc/inode.c | 9 ++++++++- fs/qnx4/inode.c | 9 ++++++++- fs/reiserfs/super.c | 9 ++++++++- fs/romfs/super.c | 9 ++++++++- fs/squashfs/super.c | 9 ++++++++- fs/sysv/inode.c | 9 ++++++++- fs/ubifs/super.c | 10 +++++++++- fs/udf/super.c | 9 ++++++++- fs/ufs/super.c | 9 ++++++++- fs/xfs/xfs_iget.c | 13 ++++++++++++- include/linux/fs.h | 5 ++++- include/linux/net.h | 1 - ipc/mqueue.c | 9 ++++++++- mm/shmem.c | 9 ++++++++- net/socket.c | 16 ++++++++-------- net/sunrpc/rpc_pipe.c | 10 +++++++++- 60 files changed, 490 insertions(+), 68 deletions(-) (limited to 'drivers') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 1eb76959d096..ccf0ce7866b9 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -346,3 +346,17 @@ look at examples of other filesystems) for guidance. for details of what locks to replace dcache_lock with in order to protect particular things. Most of the time, a filesystem only needs ->d_lock, which protects *all* the dcache state of a given dentry. + +-- +[mandatory] + + Filesystems must RCU-free their inodes, if they can have been accessed +via rcu-walk path walk (basically, if the file can have had a path name in the +vfs namespace). + + i_dentry and i_rcu share storage in a union, and the vfs expects +i_dentry to be reinitialized before it is freed, so an: + + INIT_LIST_HEAD(&inode->i_dentry); + +must be done in the RCU callback. diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 03185de7cd4a..856e9c398068 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -71,12 +71,18 @@ spufs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void -spufs_destroy_inode(struct inode *inode) +static void spufs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(spufs_inode_cache, SPUFS_I(inode)); } +static void spufs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, spufs_i_callback); +} + static void spufs_init_once(void *p) { diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index 61685ccceda8..cc8d2840f9b6 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -826,6 +826,13 @@ const struct address_space_operations pohmelfs_aops = { .set_page_dirty = __set_page_dirty_nobuffers, }; +static void pohmelfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(pohmelfs_inode_cache, POHMELFS_I(inode)); +} + /* * ->detroy_inode() callback. Deletes inode from the caches * and frees private data. @@ -842,8 +849,8 @@ static void pohmelfs_destroy_inode(struct inode *inode) dprintk("%s: pi: %p, inode: %p, ino: %llu.\n", __func__, pi, &pi->vfs_inode, pi->ino); - kmem_cache_free(pohmelfs_inode_cache, pi); atomic_long_dec(&psb->total_inodes); + call_rcu(&inode->i_rcu, pohmelfs_i_callback); } /* diff --git a/drivers/staging/smbfs/inode.c b/drivers/staging/smbfs/inode.c index 540a984bb516..244319dc9702 100644 --- a/drivers/staging/smbfs/inode.c +++ b/drivers/staging/smbfs/inode.c @@ -62,11 +62,18 @@ static struct inode *smb_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void smb_destroy_inode(struct inode *inode) +static void smb_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(smb_inode_cachep, SMB_I(inode)); } +static void smb_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, smb_i_callback); +} + static void init_once(void *foo) { struct smb_inode_info *ei = (struct smb_inode_info *) foo; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 1073bca8488c..f6f9081e6d2c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -237,10 +237,17 @@ struct inode *v9fs_alloc_inode(struct super_block *sb) * */ -void v9fs_destroy_inode(struct inode *inode) +static void v9fs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); } + +void v9fs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, v9fs_i_callback); +} #endif /** diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 959dbff2d42d..47dffc513a26 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void adfs_destroy_inode(struct inode *inode) +static void adfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); } +static void adfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, adfs_i_callback); +} + static void init_once(void *foo) { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; diff --git a/fs/affs/super.c b/fs/affs/super.c index 0cf7f4384cbd..4c18fcfb0a19 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb) return &i->vfs_inode; } -static void affs_destroy_inode(struct inode *inode) +static void affs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); } +static void affs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, affs_i_callback); +} + static void init_once(void *foo) { struct affs_inode_info *ei = (struct affs_inode_info *) foo; diff --git a/fs/afs/super.c b/fs/afs/super.c index 27201cffece4..f901a9d7c111 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb) return &vnode->vfs_inode; } +static void afs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct afs_vnode *vnode = AFS_FS_I(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(afs_inode_cachep, vnode); +} + /* * destroy an AFS inode struct */ @@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode) ASSERTCMP(vnode->server, ==, NULL); - kmem_cache_free(afs_inode_cachep, vnode); + call_rcu(&inode->i_rcu, afs_i_callback); atomic_dec(&afs_count_active_inodes); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index aa4e7c7ae3c6..de93581b79a2 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb) return &bi->vfs_inode; } -static void -befs_destroy_inode(struct inode *inode) +static void befs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } +static void befs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, befs_i_callback); +} + static void init_once(void *foo) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 76db6d7d49bb..a8e37f81d097 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb) return &bi->vfs_inode; } -static void bfs_destroy_inode(struct inode *inode) +static void bfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); } +static void bfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bfs_i_callback); +} + static void init_once(void *foo) { struct bfs_inode_info *bi = foo; diff --git a/fs/block_dev.c b/fs/block_dev.c index 4230252fd689..771f23527010 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -409,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void bdev_destroy_inode(struct inode *inode) +static void bdev_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); struct bdev_inode *bdi = BDEV_I(inode); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(bdev_cachep, bdi); } +static void bdev_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bdev_i_callback); +} + static void init_once(void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7ce9f8932789..f9d2994a42a2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6495,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return inode; } +static void btrfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} + void btrfs_destroy_inode(struct inode *inode) { struct btrfs_ordered_extent *ordered; @@ -6564,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode) inode_tree_del(inode); btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); free: - kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); + call_rcu(&inode->i_rcu, btrfs_i_callback); } int btrfs_drop_inode(struct inode *inode) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2a48cafc174b..47f8c8baf3b5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -368,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb) return &ci->vfs_inode; } +static void ceph_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ceph_inode_info *ci = ceph_inode(inode); + + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ceph_inode_cachep, ci); +} + void ceph_destroy_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); @@ -407,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode) if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); - kmem_cache_free(ceph_inode_cachep, ci); + call_rcu(&inode->i_rcu, ceph_i_callback); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3936aa7f2c22..223717dcc401 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -334,10 +334,17 @@ cifs_alloc_inode(struct super_block *sb) return &cifs_inode->vfs_inode; } +static void cifs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); +} + static void cifs_destroy_inode(struct inode *inode) { - kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); + call_rcu(&inode->i_rcu, cifs_i_callback); } static void diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 5ea57c8c7f97..50dc7d189f56 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void coda_destroy_inode(struct inode *inode) +static void coda_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(coda_inode_cachep, ITOC(inode)); } +static void coda_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, coda_i_callback); +} + static void init_once(void *foo) { struct coda_inode_info *ei = (struct coda_inode_info *) foo; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 2720178b7718..3042fe123a34 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -62,6 +62,16 @@ out: return inode; } +static void ecryptfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ecryptfs_inode_info *inode_info; + inode_info = ecryptfs_inode_to_private(inode); + + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ecryptfs_inode_info_cache, inode_info); +} + /** * ecryptfs_destroy_inode * @inode: The ecryptfs inode @@ -88,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode) } } ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); - kmem_cache_free(ecryptfs_inode_info_cache, inode_info); + call_rcu(&inode->i_rcu, ecryptfs_i_callback); } /** diff --git a/fs/efs/super.c b/fs/efs/super.c index 5073a07652cc..0f31acb0131c 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void efs_destroy_inode(struct inode *inode) +static void efs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); } +static void efs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, efs_i_callback); +} + static void init_once(void *foo) { struct efs_inode_info *ei = (struct efs_inode_info *) foo; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 79c3ae6e0456..8c6c4669b381 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb) return &oi->vfs_inode; } +static void exofs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); +} + /* * Remove an inode from the cache */ static void exofs_destroy_inode(struct inode *inode) { - kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); + call_rcu(&inode->i_rcu, exofs_i_callback); } /* diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d89e0b6a2d78..e0c6380ff992 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ext2_destroy_inode(struct inode *inode) +static void ext2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } +static void ext2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ext2_i_callback); +} + static void init_once(void *foo) { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index acf8695fa8f0..77ce1616f725 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -479,6 +479,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } +static void ext3_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); +} + static void ext3_destroy_inode(struct inode *inode) { if (!list_empty(&(EXT3_I(inode)->i_orphan))) { @@ -489,7 +496,7 @@ static void ext3_destroy_inode(struct inode *inode) false); dump_stack(); } - kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); + call_rcu(&inode->i_rcu, ext3_i_callback); } static void init_once(void *foo) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb15c9c0be74..cd37f9d5e447 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -841,6 +841,13 @@ static int ext4_drop_inode(struct inode *inode) return drop; } +static void ext4_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); +} + static void ext4_destroy_inode(struct inode *inode) { ext4_ioend_wait(inode); @@ -853,7 +860,7 @@ static void ext4_destroy_inode(struct inode *inode) true); dump_stack(); } - kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); + call_rcu(&inode->i_rcu, ext4_i_callback); } static void init_once(void *foo) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ad6998a92c30..8cccfebee180 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void fat_destroy_inode(struct inode *inode) +static void fat_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); } +static void fat_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, fat_i_callback); +} + static void init_once(void *foo) { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 8c04eac5079d..2ba6719ac612 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino) return ip; } +static void vxfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(vxfs_inode_cachep, inode->i_private); +} + /** * vxfs_evict_inode - remove inode from main memory * @ip: inode to discard. @@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip) { truncate_inode_pages(&ip->i_data, 0); end_writeback(ip); - kmem_cache_free(vxfs_inode_cachep, ip->i_private); + call_rcu(&ip->i_rcu, vxfs_i_callback); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cfce3ad86a92..44e0a6c57e87 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -99,6 +99,13 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) return inode; } +static void fuse_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(fuse_inode_cachep, inode); +} + static void fuse_destroy_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -106,7 +113,7 @@ static void fuse_destroy_inode(struct inode *inode) BUG_ON(!list_empty(&fi->queued_writes)); if (fi->forget_req) fuse_request_free(fi->forget_req); - kmem_cache_free(fuse_inode_cachep, inode); + call_rcu(&inode->i_rcu, fuse_i_callback); } void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2b2c4997430b..16c2ecac7eb7 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) return &ip->i_inode; } -static void gfs2_destroy_inode(struct inode *inode) +static void gfs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(gfs2_inode_cachep, inode); } +static void gfs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, gfs2_i_callback); +} + const struct super_operations gfs2_super_ops = { .alloc_inode = gfs2_alloc_inode, .destroy_inode = gfs2_destroy_inode, diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4824c27cebb8..ef4ee5716abe 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb) return i ? &i->vfs_inode : NULL; } -static void hfs_destroy_inode(struct inode *inode) +static void hfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(hfs_inode_cachep, HFS_I(inode)); } +static void hfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hfs_i_callback); +} + static const struct super_operations hfs_super_operations = { .alloc_inode = hfs_alloc_inode, .destroy_inode = hfs_destroy_inode, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 52cc746d3ba3..182e83a9079e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -488,11 +488,19 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb) return i ? &i->vfs_inode : NULL; } -static void hfsplus_destroy_inode(struct inode *inode) +static void hfsplus_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); } +static void hfsplus_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hfsplus_i_callback); +} + #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) static struct dentry *hfsplus_mount(struct file_system_type *fs_type, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 39dc505ed273..861113fcfc88 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -247,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode) } } -static void hostfs_destroy_inode(struct inode *inode) +static void hostfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kfree(HOSTFS_I(inode)); } +static void hostfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hostfs_i_callback); +} + static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) { const char *root_path = vfs->mnt_sb->s_fs_info; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 6c5f01597c3a..49935ba78db8 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void hpfs_destroy_inode(struct inode *inode) +static void hpfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); } +static void hpfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hpfs_i_callback); +} + static void init_once(void *foo) { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index f702b5f713fc..87ed48e0343d 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino) mntput(ino->i_sb->s_fs_info); } -static void hppfs_destroy_inode(struct inode *inode) +static void hppfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kfree(HPPFS_I(inode)); } +static void hppfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hppfs_i_callback); +} + static const struct super_operations hppfs_sbops = { .alloc_inode = hppfs_alloc_inode, .destroy_inode = hppfs_destroy_inode, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a5fe68189eed..9885082b470f 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) return &p->vfs_inode; } +static void hugetlbfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); +} + static void hugetlbfs_destroy_inode(struct inode *inode) { hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); - kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); + call_rcu(&inode->i_rcu, hugetlbfs_i_callback); } static const struct address_space_operations hugetlbfs_aops = { diff --git a/fs/inode.c b/fs/inode.c index 5a0a898f55d1..6751dfe8cc06 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -272,6 +272,13 @@ void __destroy_inode(struct inode *inode) } EXPORT_SYMBOL(__destroy_inode); +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(inode_cachep, inode); +} + static void destroy_inode(struct inode *inode) { BUG_ON(!list_empty(&inode->i_lru)); @@ -279,7 +286,7 @@ static void destroy_inode(struct inode *inode) if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else - kmem_cache_free(inode_cachep, (inode)); + call_rcu(&inode->i_rcu, i_callback); } /* @@ -432,6 +439,7 @@ void end_writeback(struct inode *inode) BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); inode_sync_wait(inode); + /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } EXPORT_SYMBOL(end_writeback); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d204ee4235fd..d8f3a652243d 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -81,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void isofs_destroy_inode(struct inode *inode) +static void isofs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } +static void isofs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, isofs_i_callback); +} + static void init_once(void *foo) { struct iso_inode_info *ei = foo; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index c86041b866a4..853b8e300084 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb) return &f->vfs_inode; } -static void jffs2_destroy_inode(struct inode *inode) +static void jffs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); } +static void jffs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, jffs2_i_callback); +} + static void jffs2_i_init_once(void *foo) { struct jffs2_inode_info *f = foo; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 0669fc1cc3bf..b715b0f7bdfd 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb) return &jfs_inode->vfs_inode; } +static void jfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct jfs_inode_info *ji = JFS_IP(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(jfs_inode_cachep, ji); +} + static void jfs_destroy_inode(struct inode *inode) { struct jfs_inode_info *ji = JFS_IP(inode); @@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode) ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); - kmem_cache_free(jfs_inode_cachep, ji); + call_rcu(&inode->i_rcu, jfs_i_callback); } static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index d8c71ece098f..03b8c240aeda 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached) return __logfs_iget(sb, ino); } +static void logfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(logfs_inode_cache, logfs_inode(inode)); +} + static void __logfs_destroy_inode(struct inode *inode) { struct logfs_inode *li = logfs_inode(inode); BUG_ON(li->li_block); list_del(&li->li_freeing_list); - kmem_cache_free(logfs_inode_cache, li); + call_rcu(&inode->i_rcu, logfs_i_callback); } static void logfs_destroy_inode(struct inode *inode) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index fb2020858a34..ae0b83f476a6 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void minix_destroy_inode(struct inode *inode) +static void minix_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(minix_inode_cachep, minix_i(inode)); } +static void minix_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, minix_i_callback); +} + static void init_once(void *foo) { struct minix_inode_info *ei = (struct minix_inode_info *) foo; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 8fb93b604e73..60047dbeb38d 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -58,11 +58,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ncp_destroy_inode(struct inode *inode) +static void ncp_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); } +static void ncp_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ncp_i_callback); +} + static void init_once(void *foo) { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e67e31c73416..017daa3bed38 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1438,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return &nfsi->vfs_inode; } -void nfs_destroy_inode(struct inode *inode) +static void nfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } +void nfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, nfs_i_callback); +} + static inline void nfs4_init_once(struct nfs_inode *nfsi) { #ifdef CONFIG_NFS_V4 diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index d36fc7ee615f..e2dcc9c733f7 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -162,10 +162,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return &ii->vfs_inode; } -void nilfs_destroy_inode(struct inode *inode) +static void nilfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + INIT_LIST_HEAD(&inode->i_dentry); + if (mdi) { kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ kfree(mdi); @@ -173,6 +176,11 @@ void nilfs_destroy_inode(struct inode *inode) kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } +void nilfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, nilfs_i_callback); +} + static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 93622b175fc7..a627ed82c0a3 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb) return NULL; } +static void ntfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); +} + void ntfs_destroy_big_inode(struct inode *inode) { ntfs_inode *ni = NTFS_I(inode); @@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode) BUG_ON(ni->page); if (!atomic_dec_and_test(&ni->count)) BUG(); - kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); + call_rcu(&inode->i_rcu, ntfs_i_callback); } static inline ntfs_inode *ntfs_alloc_extent_inode(void) diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index b2df490a19ed..8c5c0eddc365 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb) return &ip->ip_vfs_inode; } -static void dlmfs_destroy_inode(struct inode *inode) +static void dlmfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); } +static void dlmfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, dlmfs_i_callback); +} + static void dlmfs_evict_inode(struct inode *inode) { int status; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index cfeab7ce3697..17ff46fa8a10 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -569,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) return &oi->vfs_inode; } -static void ocfs2_destroy_inode(struct inode *inode) +static void ocfs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); } +static void ocfs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ocfs2_i_callback); +} + static unsigned long long ocfs2_max_file_offset(unsigned int bbits, unsigned int cbits) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 911e61f348fc..a2a5bff774e3 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb) return &oi->vfs_inode; } -static void openprom_destroy_inode(struct inode *inode) +static void openprom_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(op_inode_cachep, OP_I(inode)); } +static void openprom_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, openprom_i_callback); +} + static struct inode *openprom_iget(struct super_block *sb, ino_t ino) { struct inode *inode; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 3ddb6068177c..6bcb926b101b 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -65,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb) return inode; } -static void proc_destroy_inode(struct inode *inode) +static void proc_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } +static void proc_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, proc_i_callback); +} + static void init_once(void *foo) { struct proc_inode *ei = (struct proc_inode *) foo; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index fcada42f1aa3..e63b4171d583 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void qnx4_destroy_inode(struct inode *inode) +static void qnx4_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); } +static void qnx4_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, qnx4_i_callback); +} + static void init_once(void *foo) { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b243117b8752..2575682a9ead 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -529,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void reiserfs_destroy_inode(struct inode *inode) +static void reiserfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } +static void reiserfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, reiserfs_i_callback); +} + static void init_once(void *foo) { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 6647f90e55cd..2305e3121cb1 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb) /* * return a spent inode to the slab cache */ -static void romfs_destroy_inode(struct inode *inode) +static void romfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } +static void romfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, romfs_i_callback); +} + /* * get filesystem statistics */ diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 24de30ba34c1..20700b9f2b4c 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -440,11 +440,18 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb) } -static void squashfs_destroy_inode(struct inode *inode) +static void squashfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); } +static void squashfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, squashfs_i_callback); +} + static struct file_system_type squashfs_fs_type = { .owner = THIS_MODULE, diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index de44d067b9e6..0630eb969a28 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb) return &si->vfs_inode; } -static void sysv_destroy_inode(struct inode *inode) +static void sysv_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); } +static void sysv_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, sysv_i_callback); +} + static void init_once(void *p) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 91fac54c70e3..6e11c2975dcf 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb) return &ui->vfs_inode; }; +static void ubifs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ubifs_inode *ui = ubifs_inode(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ubifs_inode_slab, ui); +} + static void ubifs_destroy_inode(struct inode *inode) { struct ubifs_inode *ui = ubifs_inode(inode); kfree(ui->data); - kmem_cache_free(ubifs_inode_slab, inode); + call_rcu(&inode->i_rcu, ubifs_i_callback); } /* diff --git a/fs/udf/super.c b/fs/udf/super.c index 4a5c7c61836a..b539d53320fb 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -139,11 +139,18 @@ static struct inode *udf_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void udf_destroy_inode(struct inode *inode) +static void udf_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(udf_inode_cachep, UDF_I(inode)); } +static void udf_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, udf_i_callback); +} + static void init_once(void *foo) { struct udf_inode_info *ei = (struct udf_inode_info *)foo; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 2c47daed56da..2c61ac5d4e48 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ufs_destroy_inode(struct inode *inode) +static void ufs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); } +static void ufs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ufs_i_callback); +} + static void init_once(void *foo) { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 0cdd26932d8e..d7de5a3f7867 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -91,6 +91,17 @@ xfs_inode_alloc( return ip; } +STATIC void +xfs_inode_free_callback( + struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct xfs_inode *ip = XFS_I(inode); + + INIT_LIST_HEAD(&inode->i_dentry); + kmem_zone_free(xfs_inode_zone, ip); +} + void xfs_inode_free( struct xfs_inode *ip) @@ -134,7 +145,7 @@ xfs_inode_free( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); - kmem_zone_free(xfs_inode_zone, ip); + call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 296cf2fde945..1ff4d0a33b25 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -737,7 +737,10 @@ struct inode { struct list_head i_wb_list; /* backing dev IO list */ struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; - struct list_head i_dentry; + union { + struct list_head i_dentry; + struct rcu_head i_rcu; + }; unsigned long i_ino; atomic_t i_count; unsigned int i_nlink; diff --git a/include/linux/net.h b/include/linux/net.h index 16faa130088c..06bde4908473 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -120,7 +120,6 @@ enum sock_shutdown_cmd { struct socket_wq { wait_queue_head_t wait; struct fasync_struct *fasync_list; - struct rcu_head rcu; } ____cacheline_aligned_in_smp; /** diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 035f4399edbc..14fb6d67e6a3 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -237,11 +237,18 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void mqueue_destroy_inode(struct inode *inode) +static void mqueue_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); } +static void mqueue_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, mqueue_i_callback); +} + static void mqueue_evict_inode(struct inode *inode) { struct mqueue_inode_info *info; diff --git a/mm/shmem.c b/mm/shmem.c index 47fdeeb9d636..5ee67c990602 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2415,13 +2415,20 @@ static struct inode *shmem_alloc_inode(struct super_block *sb) return &p->vfs_inode; } +static void shmem_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); +} + static void shmem_destroy_inode(struct inode *inode) { if ((inode->i_mode & S_IFMT) == S_IFREG) { /* only struct inode is valid if it's an inline symlink */ mpol_free_shared_policy(&SHMEM_I(inode)->policy); } - kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); + call_rcu(&inode->i_rcu, shmem_i_callback); } static void init_once(void *foo) diff --git a/net/socket.c b/net/socket.c index 088fb3fd45e0..97fff3a4e72f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -262,20 +262,20 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } -static void wq_free_rcu(struct rcu_head *head) +static void sock_free_rcu(struct rcu_head *head) { - struct socket_wq *wq = container_of(head, struct socket_wq, rcu); + struct inode *inode = container_of(head, struct inode, i_rcu); + struct socket_alloc *ei = container_of(inode, struct socket_alloc, + vfs_inode); - kfree(wq); + kfree(ei->socket.wq); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(sock_inode_cachep, ei); } static void sock_destroy_inode(struct inode *inode) { - struct socket_alloc *ei; - - ei = container_of(inode, struct socket_alloc, vfs_inode); - call_rcu(&ei->socket.wq->rcu, wq_free_rcu); - kmem_cache_free(sock_inode_cachep, ei); + call_rcu(&inode->i_rcu, sock_free_rcu); } static void init_once(void *foo) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a0dc1a86fcea..2899fe27f880 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -162,11 +162,19 @@ rpc_alloc_inode(struct super_block *sb) } static void -rpc_destroy_inode(struct inode *inode) +rpc_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); } +static void +rpc_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, rpc_i_callback); +} + static int rpc_pipe_open(struct inode *inode, struct file *filp) { -- cgit v1.2.3 From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:55 +1100 Subject: fs: dcache reduce branches in lookup path Reduce some branches and memory accesses in dcache lookup by adding dentry flags to indicate common d_ops are set, rather than having to check them. This saves a pointer memory access (dentry->d_op) in common path lookup situations, and saves another pointer load and branch in cases where we have d_op but not the particular operation. Patched with: git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i Signed-off-by: Nick Piggin --- arch/ia64/kernel/perfmon.c | 2 +- drivers/staging/autofs/root.c | 2 +- drivers/staging/smbfs/dir.c | 8 ++++---- fs/9p/vfs_inode.c | 26 +++++++++++++------------- fs/adfs/dir.c | 2 +- fs/adfs/super.c | 2 +- fs/affs/namei.c | 2 +- fs/affs/super.c | 2 +- fs/afs/dir.c | 2 +- fs/anon_inodes.c | 2 +- fs/autofs4/inode.c | 2 +- fs/autofs4/root.c | 10 +++++----- fs/btrfs/export.c | 4 ++-- fs/btrfs/inode.c | 2 +- fs/ceph/dir.c | 6 +++--- fs/cifs/dir.c | 16 ++++++++-------- fs/cifs/inode.c | 8 ++++---- fs/cifs/link.c | 4 ++-- fs/cifs/readdir.c | 4 ++-- fs/coda/dir.c | 2 +- fs/configfs/dir.c | 8 ++++---- fs/dcache.c | 30 ++++++++++++++++++++++++++---- fs/ecryptfs/inode.c | 2 +- fs/ecryptfs/main.c | 4 ++-- fs/fat/inode.c | 4 ++-- fs/fat/namei_msdos.c | 6 +++--- fs/fat/namei_vfat.c | 8 ++++---- fs/fuse/dir.c | 2 +- fs/fuse/inode.c | 4 ++-- fs/gfs2/export.c | 4 ++-- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/ops_inode.c | 2 +- fs/hfs/dir.c | 2 +- fs/hfs/super.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hfsplus/super.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/dentry.c | 2 +- fs/isofs/inode.c | 2 +- fs/isofs/namei.c | 2 +- fs/jfs/namei.c | 4 ++-- fs/jfs/super.c | 2 +- fs/libfs.c | 2 +- fs/minix/namei.c | 2 +- fs/namei.c | 31 ++++++++++++++++++++----------- fs/ncpfs/dir.c | 4 ++-- fs/ncpfs/inode.c | 2 +- fs/nfs/dir.c | 6 +++--- fs/nfs/getroot.c | 4 ++-- fs/ocfs2/export.c | 4 ++-- fs/ocfs2/namei.c | 10 +++++----- fs/pipe.c | 2 +- fs/proc/base.c | 12 ++++++------ fs/proc/generic.c | 2 +- fs/proc/proc_sysctl.c | 4 ++-- fs/reiserfs/xattr.c | 2 +- fs/sysfs/dir.c | 2 +- fs/sysv/namei.c | 2 +- fs/sysv/super.c | 2 +- include/linux/dcache.h | 6 ++++++ kernel/cgroup.c | 2 +- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 63 files changed, 174 insertions(+), 137 deletions(-) (limited to 'drivers') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index d39d8a53b579..5a24f40bb48e 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2233,7 +2233,7 @@ pfm_alloc_file(pfm_context_t *ctx) } path.mnt = mntget(pfmfs_mnt); - path.dentry->d_op = &pfmfs_dentry_operations; + d_set_d_op(path.dentry, &pfmfs_dentry_operations); d_add(path.dentry, inode); file = alloc_file(&path, FMODE_READ, &pfm_file_ops); diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c index 0fdec4befd84..b09adb57971f 100644 --- a/drivers/staging/autofs/root.c +++ b/drivers/staging/autofs/root.c @@ -237,7 +237,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr * * We need to do this before we release the directory semaphore. */ - dentry->d_op = &autofs_dentry_operations; + d_set_d_op(dentry, &autofs_dentry_operations); dentry->d_flags |= DCACHE_AUTOFS_PENDING; d_add(dentry, NULL); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 5f79799d5d4a..78f09412740c 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -398,9 +398,9 @@ smb_new_dentry(struct dentry *dentry) struct smb_sb_info *server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); dentry->d_time = jiffies; } @@ -462,9 +462,9 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) add_entry: server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); d_add(dentry, inode); smb_renew_times(dentry); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f6f9081e6d2c..df8bbb358d54 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -635,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, } if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); @@ -749,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -767,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -956,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -973,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -1041,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, inst_out: if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_add(dentry, inode); return NULL; @@ -1709,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1722,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } @@ -1856,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, ihold(old_dentry->d_inode); } - dentry->d_op = old_dentry->d_op; + d_set_d_op(dentry, old_dentry->d_op); d_instantiate(dentry, old_dentry->d_inode); return err; @@ -1980,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1996,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index a11e5e102716..bf7693c384f9 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -276,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct object_info obj; int error; - dentry->d_op = &adfs_dentry_operations; + d_set_d_op(dentry, &adfs_dentry_operations); lock_kernel(); error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); if (error == 0) { diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 47dffc513a26..a4041b52fbca 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -484,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) adfs_error(sb, "get root inode failed\n"); goto error; } else - sb->s_root->d_op = &adfs_dentry_operations; + d_set_d_op(sb->s_root, &adfs_dentry_operations); unlock_kernel(); return 0; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 5aca08c21100..944a4042fb65 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -240,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (IS_ERR(inode)) return ERR_CAST(inode); } - dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; + d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/affs/super.c b/fs/affs/super.c index 4c18fcfb0a19..d39081bbe7ce 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -482,7 +482,7 @@ got_root: printk(KERN_ERR "AFFS: Get root inode failed\n"); goto out_error; } - sb->s_root->d_op = &affs_dentry_operations; + d_set_d_op(sb->s_root, &affs_dentry_operations); pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); return 0; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2c18cde27000..b8bb7e7148d0 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -581,7 +581,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, } success: - dentry->d_op = &afs_fs_dentry_operations; + d_set_d_op(dentry, &afs_fs_dentry_operations); d_add(dentry, inode); _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 57ce55b2564c..aca8806fa206 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name, */ ihold(anon_inode_inode); - path.dentry->d_op = &anon_inodefs_dentry_operations; + d_set_d_op(path.dentry, &anon_inodefs_dentry_operations); d_instantiate(path.dentry, anon_inode_inode); error = -ENFILE; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index ac87e49fa706..a7bdb9dcac84 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_iput; pipe = NULL; - root->d_op = &autofs4_sb_dentry_operations; + d_set_d_op(root, &autofs4_sb_dentry_operations); root->d_fsdata = ino; /* Can this call block? */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 10ca68a96dc7..bfe3f2eb684d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -571,7 +571,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * we check for the hashed dentry and return the newly * hashed dentry. */ - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); /* * And we need to ensure that the same dentry is used for @@ -710,9 +710,9 @@ static int autofs4_dir_symlink(struct inode *dir, d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); @@ -845,9 +845,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 659f532d26a0..0ccf9a8afcdf 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.offset = 0; dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9d2994a42a2..63e4546b478a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) int index; int ret; - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 58abc3da6111..cc01cf826769 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) - dentry->d_op = &ceph_dentry_ops; + d_set_d_op(dentry, &ceph_dentry_ops); else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) - dentry->d_op = &ceph_snapdir_dentry_ops; + d_set_d_op(dentry, &ceph_snapdir_dentry_ops); else - dentry->d_op = &ceph_snap_dentry_ops; + d_set_d_op(dentry, &ceph_snap_dentry_ops); di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); if (!di) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 88bfe686ac00..e3b10ca6d453 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon, struct inode *newinode) { if (tcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } @@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); if (rc == 0) d_instantiate(direntry, newinode); @@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if ((rc == 0) && (newInode != NULL)) { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, newInode); if (posix_open) { filp = lookup_instantiate_filp(nd, direntry, @@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, rc = 0; direntry->d_time = jiffies; if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, NULL); /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 99b9a2cc14b7..2a239d878e85 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) to set uid/gid */ inc_nlink(inode); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); cifs_fill_uniqueid(inode->i_sb, &fattr); @@ -1363,9 +1363,9 @@ mkdir_get_info: inode->i_sb, xid, NULL); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); /* setting nlink not necessary except in cases where we * failed to get it from the server or was set bogus */ diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 85cdbf831e7b..fe2f6a93c49e 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc); } else { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ee463aeca0b0..ec5b68e3b928 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, } if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) - dentry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(dentry, &cifs_ci_dentry_ops); else - dentry->d_op = &cifs_dentry_ops; + d_set_d_op(dentry, &cifs_dentry_ops); alias = d_materialise_unique(dentry, inode); if (alias != NULL) { diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 9e37e8bc9b89..aa40c811f8d2 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc return ERR_PTR(error); exit: - entry->d_op = &coda_dentry_operations; + d_set_d_op(entry, &coda_dentry_operations); if (inode && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index e9acea440ffc..36637a8c1ed3 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den return error; } - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_rehash(dentry); return 0; @@ -489,7 +489,7 @@ static struct dentry * configfs_lookup(struct inode *dir, */ if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); return NULL; } @@ -683,7 +683,7 @@ static int create_default_group(struct config_group *parent_group, ret = -ENOMEM; child = d_alloc(parent, &name); if (child) { - child->d_op = &configfs_dentry_ops; + d_set_d_op(child, &configfs_dentry_ops); d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, @@ -1681,7 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = -ENOMEM; dentry = d_alloc(configfs_sb->s_root, &name); if (dentry) { - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); err = configfs_attach_group(sd->s_element, &group->cg_item, diff --git a/fs/dcache.c b/fs/dcache.c index 1d5cf511e1c7..f9693da3efbd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -398,7 +398,7 @@ repeat: return; } - if (dentry->d_op && dentry->d_op->d_delete) { + if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) goto kill_it; } @@ -1301,6 +1301,28 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); +void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +{ + BUG_ON(dentry->d_op); + BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | + DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | + DCACHE_OP_DELETE )); + dentry->d_op = op; + if (!op) + return; + if (op->d_hash) + dentry->d_flags |= DCACHE_OP_HASH; + if (op->d_compare) + dentry->d_flags |= DCACHE_OP_COMPARE; + if (op->d_revalidate) + dentry->d_flags |= DCACHE_OP_REVALIDATE; + if (op->d_delete) + dentry->d_flags |= DCACHE_OP_DELETE; + +} +EXPORT_SYMBOL(d_set_d_op); + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); @@ -1731,7 +1753,7 @@ seqretry: */ if (read_seqcount_retry(&dentry->d_seq, *seq)) goto seqretry; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, *inode, dentry, i, tlen, tname, name)) @@ -1846,7 +1868,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) */ tlen = dentry->d_name.len; tname = dentry->d_name.name; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, tlen, tname, name)) @@ -1887,7 +1909,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(name->name, name->len); - if (dir->d_op && dir->d_op->d_hash) { + if (dir->d_flags & DCACHE_OP_HASH) { if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5e5c7ec1fc98..f91b35db4c6e 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct qstr lower_name; int rc = 0; - ecryptfs_dentry->d_op = &ecryptfs_dops; + d_set_d_op(ecryptfs_dentry, &ecryptfs_dops); if ((ecryptfs_dentry->d_name.len == 1 && !strcmp(ecryptfs_dentry->d_name.name, ".")) || (ecryptfs_dentry->d_name.len == 2 diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a9dbd62518e6..351038675376 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, if (special_file(lower_inode->i_mode)) init_special_inode(inode, lower_inode->i_mode, lower_inode->i_rdev); - dentry->d_op = &ecryptfs_dops; + d_set_d_op(dentry, &ecryptfs_dops); fsstack_copy_attr_all(inode, lower_inode); /* This size will be overwritten for real files w/ headers and * other metadata */ @@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags deactivate_locked_super(s); goto out; } - s->s_root->d_op = &ecryptfs_dops; + d_set_d_op(s->s_root, &ecryptfs_dops); s->s_root->d_sb = s; s->s_root->d_parent = s->s_root; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8cccfebee180..206351af7c58 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -750,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, */ result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = sb->s_root->d_op; + d_set_d_op(result, sb->s_root->d_op); return result; } @@ -800,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent)) - parent->d_op = sb->s_root->d_op; + d_set_d_op(parent, sb->s_root->d_op); out: unlock_super(sb); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3b3e072d8982..35ffe43afa4b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -227,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, } out: unlock_super(sb); - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); dentry = d_splice_alias(inode, dentry); if (dentry) - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); return dentry; error: @@ -673,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) } sb->s_flags |= MS_NOATIME; - sb->s_root->d_op = &msdos_dentry_operations; + d_set_d_op(sb->s_root, &msdos_dentry_operations); unlock_super(sb); return 0; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 4fc06278db48..3be5ed7d859f 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -766,11 +766,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: unlock_super(sb); - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; @@ -1072,9 +1072,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) } if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_ci_dentry_ops; + d_set_d_op(sb->s_root, &vfat_ci_dentry_ops); else - sb->s_root->d_op = &vfat_dentry_ops; + d_set_d_op(sb->s_root, &vfat_dentry_ops); unlock_super(sb); return 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c9627c95482d..c9a8a426a395 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -347,7 +347,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } entry = newent ? newent : entry; - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); if (outarg_valid) fuse_change_entry_timeout(entry, &outarg); else diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 44e0a6c57e87..a8b31da19b93 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -626,7 +626,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, entry = d_obtain_alias(inode); if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); fuse_invalidate_entry_cache(entry); } @@ -728,7 +728,7 @@ static struct dentry *fuse_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { - parent->d_op = &fuse_dentry_operations; + d_set_d_op(parent, &fuse_dentry_operations); fuse_invalidate_entry_cache(parent); } diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 5ab3839dfcb9..97012ecff560 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } @@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, out_inode: dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3eb1393f7b81..2aeabd4218cc 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, iput(inode); return -ENOMEM; } - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); *dptr = dentry; return 0; } diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 12cbea7502c2..f28f89796f4d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, { struct inode *inode = NULL; - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && IS_ERR(inode)) diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 2b3b8611b41b..ea4aefe7c652 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; int res; - dentry->d_op = &hfs_dentry_operations; + d_set_d_op(dentry, &hfs_dentry_operations); hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ef4ee5716abe..0bef62aa4f42 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -434,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) goto bail_iput; - sb->s_root->d_op = &hfs_dentry_operations; + d_set_d_op(sb->s_root, &hfs_dentry_operations); /* everything's okay */ return 0; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 9d59c0571f59..ccab87145f7a 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; - dentry->d_op = &hfsplus_dentry_operations; + d_set_d_op(dentry, &hfsplus_dentry_operations); dentry->d_fsdata = NULL; hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 182e83a9079e..ddf712e4700e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -419,7 +419,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto cleanup; } - sb->s_root->d_op = &hfsplus_dentry_operations; + d_set_d_op(sb->s_root, &hfsplus_dentry_operations); str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 861113fcfc88..0bc81cf256b8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -612,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, goto out_put; d_add(dentry, inode); - dentry->d_op = &hostfs_dentry_ops; + d_set_d_op(dentry, &hostfs_dentry_ops); return NULL; out_put: diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 35526df1fd32..32c13a94e1e9 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -65,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = { void hpfs_set_dentry_operations(struct dentry *dentry) { - dentry->d_op = &hpfs_dentry_operations; + d_set_d_op(dentry, &hpfs_dentry_operations); } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d8f3a652243d..844a7903c72f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -949,7 +949,7 @@ root_found: table += 2; if (opt.check == 'r') table++; - s->s_root->d_op = &isofs_dentry_ops[table]; + d_set_d_op(s->s_root, &isofs_dentry_ops[table]); kfree(opt.iocharset); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 715f7d318046..679a849c3b27 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -172,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam struct inode *inode; struct page *page; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); page = alloc_page(GFP_USER); if (!page) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 57f90dad8919..a151cbdec626 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1466,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc jfs_info("jfs_lookup: name = %s", name); if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); if ((name[0] == '.') && (len == 1)) inum = dip->i_ino; @@ -1495,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); return dentry; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b715b0f7bdfd..3150d766e0d4 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -525,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_no_root; if (sbi->mntflag & JFS_OS2) - sb->s_root->d_op = &jfs_ci_dentry_operations; + d_set_d_op(sb->s_root, &jfs_ci_dentry_operations); /* logical blocks are represented by 40 bits in pxd_t, etc. */ sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; diff --git a/fs/libfs.c b/fs/libfs.c index 28b36663c44e..889311e3d06b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -59,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &simple_dentry_operations; + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/fs/minix/namei.c b/fs/minix/namei.c index c0d35a3accef..1b9e07728a9f 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/namei.c b/fs/namei.c index c731b50a6184..90bd2873e117 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -587,6 +587,17 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) return dentry; } +static inline int need_reval_dot(struct dentry *dentry) +{ + if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) + return 0; + + if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) + return 0; + + return 1; +} + /* * force_reval_path - force revalidation of a dentry * @@ -610,10 +621,9 @@ force_reval_path(struct path *path, struct nameidata *nd) /* * only check on filesystems where it's possible for the dentry to - * become stale. It's assumed that if this flag is set then the - * d_revalidate op will also be defined. + * become stale. */ - if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) + if (!need_reval_dot(dentry)) return 0; status = dentry->d_op->d_revalidate(dentry, nd); @@ -1003,7 +1013,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (parent->d_op && parent->d_op->d_hash) { + if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { int err = parent->d_op->d_hash(parent, nd->inode, name); if (err < 0) return err; @@ -1029,7 +1039,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (dentry->d_op && dentry->d_op->d_revalidate) { + if (dentry->d_flags & DCACHE_OP_REVALIDATE) { /* We commonly drop rcu-walk here */ if (nameidata_dentry_drop_rcu(nd, dentry)) return -ECHILD; @@ -1043,7 +1053,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, if (!dentry) goto need_lookup; found: - if (dentry->d_op && dentry->d_op->d_revalidate) + if (dentry->d_flags & DCACHE_OP_REVALIDATE) goto need_revalidate; done: path->mnt = mnt; @@ -1281,8 +1291,7 @@ return_reval: * We bypassed the ordinary revalidation routines. * We may need to check the cached dentry for staleness. */ - if (nd->path.dentry && nd->path.dentry->d_sb && - (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { + if (need_reval_dot(nd->path.dentry)) { if (nameidata_drop_rcu_maybe(nd)) return -ECHILD; err = -ESTALE; @@ -1602,7 +1611,7 @@ static struct dentry *__lookup_hash(struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (base->d_op && base->d_op->d_hash) { + if (base->d_flags & DCACHE_OP_HASH) { err = base->d_op->d_hash(base, inode, name); dentry = ERR_PTR(err); if (err < 0) @@ -1616,7 +1625,7 @@ static struct dentry *__lookup_hash(struct qstr *name, */ dentry = d_lookup(base, name); - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) + if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) dentry = do_revalidate(dentry, nd); if (!dentry) @@ -2070,7 +2079,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, follow_dotdot(nd); dir = nd->path.dentry; case LAST_DOT: - if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { + if (need_reval_dot(dir)) { if (!dir->d_op->d_revalidate(dir, nd)) { error = -ESTALE; goto exit; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0ba3cdc95a44..4b9cbb28d7fa 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -633,7 +633,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, entry->ino = iunique(dir->i_sb, 2); inode = ncp_iget(dir->i_sb, entry); if (inode) { - newdent->d_op = &ncp_dentry_operations; + d_set_d_op(newdent, &ncp_dentry_operations); d_instantiate(newdent, inode); if (!hashed) d_rehash(newdent); @@ -889,7 +889,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc if (inode) { ncp_new_dentry(dentry); add_entry: - dentry->d_op = &ncp_dentry_operations; + d_set_d_op(dentry, &ncp_dentry_operations); d_add(dentry, inode); error = 0; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 60047dbeb38d..0c75a5f3cafd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -717,7 +717,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &ncp_root_dentry_operations; + d_set_d_op(sb->s_root, &ncp_root_dentry_operations); return 0; out_no_root: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index eb77471b8823..37e0a8bb077e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) if (dentry == NULL) return; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); if (IS_ERR(inode)) goto out; @@ -1188,7 +1188,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (dentry->d_name.len > NFS_SERVER(dir)->namelen) goto out; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* * If we're doing an exclusive create, optimize away the lookup @@ -1333,7 +1333,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = ERR_PTR(-ENAMETOOLONG); goto out; } - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash * the dentry. */ diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b3e36c3430de..c3a5a1126833 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -121,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fsinfo.fattr); return ret; @@ -228,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fattr); diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 19ad145d2af3..6adafa576065 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -138,7 +138,7 @@ check_gen: result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = &ocfs2_dentry_ops; + d_set_d_op(result, &ocfs2_dentry_ops); else mlog_errno(PTR_ERR(result)); @@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); if (!IS_ERR(parent)) - parent->d_op = &ocfs2_dentry_ops; + d_set_d_op(parent, &ocfs2_dentry_ops); bail_unlock: ocfs2_inode_unlock(dir, 0); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ff5744e1e36f..d14cad6e2e41 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, spin_unlock(&oi->ip_lock); bail_add: - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); ret = d_splice_alias(inode, dentry); if (inode) { @@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, OCFS2_I(inode)->ip_blkno, parent_fe_bh, @@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry, } ihold(inode); - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); out_commit: @@ -1794,7 +1794,7 @@ static int ocfs2_symlink(struct inode *dir, mlog_errno(status); goto bail; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, le64_to_cpu(fe->i_blkno), parent_fe_bh, @@ -2459,7 +2459,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, goto out_commit; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); status = 0; out_commit: diff --git a/fs/pipe.c b/fs/pipe.c index ae3592dcc88c..01a786567810 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1004,7 +1004,7 @@ struct file *create_write_pipe(int flags) goto err_inode; path.mnt = mntget(pipe_mnt); - path.dentry->d_op = &pipefs_dentry_operations; + d_set_d_op(path.dentry, &pipefs_dentry_operations); d_instantiate(path.dentry, inode); err = -ENFILE; diff --git a/fs/proc/base.c b/fs/proc/base.c index d932fdb6a245..85f0a80912aa 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1969,7 +1969,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2137,7 +2137,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, ei->fd = fd; inode->i_mode = S_IFREG | S_IRUSR; inode->i_fop = &proc_fdinfo_file_operations; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2196,7 +2196,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, NULL)) @@ -2615,7 +2615,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &proc_base_dentry_operations; + d_set_d_op(dentry, &proc_base_dentry_operations); d_add(dentry, inode); error = NULL; out: @@ -2926,7 +2926,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ @@ -3169,7 +3169,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 1d607be36d95..f766be29d2c7 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, out_unlock: if (inode) { - dentry->d_op = &proc_dentry_operations; + d_set_d_op(dentry, &proc_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 998e3a715bcc..35efd85a4d32 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -120,7 +120,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; err = NULL; - dentry->d_op = &proc_sys_dentry_operations; + d_set_d_op(dentry, &proc_sys_dentry_operations); d_add(dentry, inode); out: @@ -201,7 +201,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, dput(child); return -ENOMEM; } else { - child->d_op = &proc_sys_dentry_operations; + d_set_d_op(child, &proc_sys_dentry_operations); d_add(child, inode); } } else { diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5d04a7828e7a..e0f0d7ea10a1 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -990,7 +990,7 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - dentry->d_op = &xattr_lookup_poison_ops; + d_set_d_op(dentry, &xattr_lookup_poison_ops); if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 27e1102e303e..3e076caa8daf 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -701,7 +701,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, /* instantiate and hash dentry */ ret = d_find_alias(inode); if (!ret) { - dentry->d_op = &sysfs_dentry_ops; + d_set_d_op(dentry, &sysfs_dentry_ops); dentry->d_fsdata = sysfs_get(sd); d_add(dentry, inode); } else { diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 7507aeb4c90e..b5e68da2db32 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -48,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > SYSV_NAMELEN) return ERR_PTR(-ENAMETOOLONG); ino = sysv_inode_by_name(dentry); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 3d9c62be0c10..76712aefc4ab 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size) if (sbi->s_forced_ro) sb->s_flags |= MS_RDONLY; if (sbi->s_truncate) - sb->s_root->d_op = &sysv_dentry_operations; + d_set_d_op(sb->s_root, &sysv_dentry_operations); return 1; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e4414693065e..f4b40a751f09 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -183,6 +183,11 @@ struct dentry_operations { #define DCACHE_GENOCIDE 0x0200 #define DCACHE_MOUNTED 0x0400 /* is a mountpoint */ +#define DCACHE_OP_HASH 0x1000 +#define DCACHE_OP_COMPARE 0x2000 +#define DCACHE_OP_REVALIDATE 0x4000 +#define DCACHE_OP_DELETE 0x8000 + extern spinlock_t dcache_inode_lock; extern seqlock_t rename_lock; @@ -201,6 +206,7 @@ extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); +extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9f41470c3949..51cddc11cd85 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2222,7 +2222,7 @@ static struct dentry *cgroup_lookup(struct inode *dir, if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &cgroup_dentry_operations; + d_set_d_op(dentry, &cgroup_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/net/socket.c b/net/socket.c index 817dc92e9ef8..991e266bc7ae 100644 --- a/net/socket.c +++ b/net/socket.c @@ -368,7 +368,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) } path.mnt = mntget(sock_mnt); - path.dentry->d_op = &sockfs_dentry_operations; + d_set_d_op(path.dentry, &sockfs_dentry_operations); d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 2899fe27f880..09f01f41e55a 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -591,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent, } } if (!dentry->d_inode) - dentry->d_op = &rpc_dentry_operations; + d_set_d_op(dentry, &rpc_dentry_operations); out_err: return dentry; } -- cgit v1.2.3 From 34286d6662308d82aed891852d04c7c3a2649b16 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:57 +1100 Subject: fs: rcu-walk aware d_revalidate method Require filesystems be aware of .d_revalidate being called in rcu-walk mode (nd->flags & LOOKUP_RCU). For now do a simple push down, returning -ECHILD from all implementations. Signed-off-by: Nick Piggin --- Documentation/filesystems/Locking | 18 +++++------ Documentation/filesystems/path-lookup.txt | 5 ++- Documentation/filesystems/porting | 20 ++++++++++++ Documentation/filesystems/vfs.txt | 9 ++++++ drivers/staging/autofs/root.c | 5 ++- drivers/staging/smbfs/dir.c | 16 ++++++--- fs/afs/dir.c | 4 +++ fs/autofs4/root.c | 13 ++++++-- fs/ceph/dir.c | 7 +++- fs/cifs/dir.c | 3 ++ fs/coda/dir.c | 7 +++- fs/ecryptfs/dentry.c | 9 ++++-- fs/fat/namei_vfat.c | 6 ++++ fs/fuse/dir.c | 6 +++- fs/gfs2/dentry.c | 17 +++++++--- fs/hfs/sysdep.c | 7 +++- fs/jfs/namei.c | 2 ++ fs/namei.c | 54 ++++++++++++++++++++----------- fs/ncpfs/dir.c | 4 +++ fs/ncpfs/inode.c | 1 + fs/nfs/dir.c | 8 +++-- fs/ocfs2/dcache.c | 10 ++++-- fs/proc/base.c | 33 +++++++++++++++---- fs/proc/proc_sysctl.c | 3 ++ fs/reiserfs/xattr.c | 2 ++ fs/sysfs/dir.c | 6 +++- include/linux/dcache.h | 1 - 27 files changed, 215 insertions(+), 61 deletions(-) (limited to 'drivers') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index bdad6414dfa0..e90ffe61eb65 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -9,7 +9,7 @@ be able to use diff(1). --------------------------- dentry_operations -------------------------- prototypes: - int (*d_revalidate)(struct dentry *, int); + int (*d_revalidate)(struct dentry *, struct nameidata *); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, @@ -21,14 +21,14 @@ prototypes: char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); locking rules: - rename_lock ->d_lock may block -d_revalidate: no no yes -d_hash no no no -d_compare: yes no no -d_delete: no yes no -d_release: no no yes -d_iput: no no yes -d_dname: no no no + rename_lock ->d_lock may block rcu-walk +d_revalidate: no no yes (ref-walk) maybe +d_hash no no no maybe +d_compare: yes no no maybe +d_delete: no yes no no +d_release: no no yes no +d_iput: no no yes no +d_dname: no no no no --------------------------- inode_operations --------------------------- prototypes: diff --git a/Documentation/filesystems/path-lookup.txt b/Documentation/filesystems/path-lookup.txt index 09b2878724a1..8789d1810bed 100644 --- a/Documentation/filesystems/path-lookup.txt +++ b/Documentation/filesystems/path-lookup.txt @@ -317,11 +317,10 @@ The detailed design for rcu-walk is like this: The cases where rcu-walk cannot continue are: * NULL dentry (ie. any uncached path element) * parent with d_inode->i_op->permission or ACLs -* dentries with d_revalidate * Following links -In future patches, permission checks and d_revalidate become rcu-walk aware. It -may be possible eventually to make following links rcu-walk aware. +In future patches, permission checks become rcu-walk aware. It may be possible +eventually to make following links rcu-walk aware. Uncached path elements will always require dropping to ref-walk mode, at the very least because i_mutex needs to be grabbed, and objects allocated. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index ccf0ce7866b9..cd9756a2709d 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -360,3 +360,23 @@ i_dentry to be reinitialized before it is freed, so an: INIT_LIST_HEAD(&inode->i_dentry); must be done in the RCU callback. + +-- +[recommended] + vfs now tries to do path walking in "rcu-walk mode", which avoids +atomic operations and scalability hazards on dentries and inodes (see +Documentation/filesystems/path-walk.txt). d_hash and d_compare changes (above) +are examples of the changes required to support this. For more complex +filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so +no changes are required to the filesystem. However, this is costly and loses +the benefits of rcu-walk mode. We will begin to add filesystem callbacks that +are rcu-walk aware, shown below. Filesystems should take advantage of this +where possible. + +-- +[mandatory] + d_revalidate is a callback that is made on every path element (if +the filesystem provides it), which requires dropping out of rcu-walk mode. This +may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be +returned if the filesystem cannot handle rcu-walk. See +Documentation/filesystems/vfs.txt for more details. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 69b10ff5ec81..c936b4912383 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -863,6 +863,15 @@ struct dentry_operations { dcache. Most filesystems leave this as NULL, because all their dentries in the dcache are valid + d_revalidate may be called in rcu-walk mode (nd->flags & LOOKUP_RCU). + If in rcu-walk mode, the filesystem must revalidate the dentry without + blocking or storing to the dentry, d_parent and d_inode should not be + used without care (because they can go NULL), instead nd->inode should + be used. + + If a situation is encountered that rcu-walk cannot handle, return + -ECHILD and it will be called again in ref-walk mode. + d_hash: called when the VFS adds a dentry to the hash table. The first dentry passed to d_hash is the parent directory that the name is to be hashed into. The inode is the dentry's inode. diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c index b09adb57971f..bf0e9755da67 100644 --- a/drivers/staging/autofs/root.c +++ b/drivers/staging/autofs/root.c @@ -154,13 +154,16 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str * yet completely filled in, and revalidate has to delay such * lookups.. */ -static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) +static int autofs_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode * dir; struct autofs_sb_info *sbi; struct autofs_dir_ent *ent; int res; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + lock_kernel(); dir = dentry->d_parent->d_inode; sbi = autofs_sbi(dir->i_sb); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 78f09412740c..dd612f50749f 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "smb_fs.h" #include "smb_mount.h" @@ -301,13 +302,20 @@ static const struct dentry_operations smbfs_dentry_operations_case = * This is the callback when the dcache has a lookup hit. */ static int -smb_lookup_validate(struct dentry * dentry, struct nameidata *nd) +smb_lookup_validate(struct dentry *dentry, struct nameidata *nd) { - struct smb_sb_info *server = server_from_dentry(dentry); - struct inode * inode = dentry->d_inode; - unsigned long age = jiffies - dentry->d_time; + struct smb_sb_info *server; + struct inode *inode; + unsigned long age; int valid; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + server = server_from_dentry(dentry); + inode = dentry->d_inode; + age = jiffies - dentry->d_time; + /* * The default validation is based on dentry age: * we believe in dentries for a few seconds. (But each diff --git a/fs/afs/dir.c b/fs/afs/dir.c index b8bb7e7148d0..34a3263d60a4 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -607,6 +608,9 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) void *dir_version; int ret; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + vnode = AFS_FS_I(dentry->d_inode); if (dentry->d_inode) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index bfe3f2eb684d..651e4ef563b1 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -315,12 +315,19 @@ out_error: */ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *dir = dentry->d_parent->d_inode; - struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); - int oz_mode = autofs4_oz_mode(sbi); + struct inode *dir; + struct autofs_sb_info *sbi; + int oz_mode; int flags = nd ? nd->flags : 0; int status = 1; + if (flags & LOOKUP_RCU) + return -ECHILD; + + dir = dentry->d_parent->d_inode; + sbi = autofs4_sbi(dir->i_sb); + oz_mode = autofs4_oz_mode(sbi); + /* Pending dentry */ spin_lock(&sbi->fs_lock); if (autofs4_ispending(dentry)) { diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index cc01cf826769..fa7ca04ee816 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -990,7 +990,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) */ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *dir = dentry->d_parent->d_inode; + struct inode *dir; + + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + dir = dentry->d_parent->d_inode; dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode, diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index e3b10ca6d453..db2a58c00f7b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -656,6 +656,9 @@ lookup_out: static int cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + if (direntry->d_inode) { if (cifs_revalidate_dentry(direntry)) return 0; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index aa40c811f8d2..619a8303766e 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -541,9 +542,13 @@ out: /* called when a cache lookup succeeds */ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) { - struct inode *inode = de->d_inode; + struct inode *inode; struct coda_inode_info *cii; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = de->d_inode; if (!inode || coda_isroot(inode)) goto out; if (is_bad_inode(inode)) diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 906e803f7f79..6fc4f319b550 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -44,12 +44,17 @@ */ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; struct dentry *dentry_save; struct vfsmount *vfsmount_save; int rc = 1; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) goto out; dentry_save = nd->path.dentry; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 3be5ed7d859f..e3ffc5e12332 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -43,6 +43,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry) static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + /* This is not negative dentry. Always valid. */ if (dentry->d_inode) return 1; @@ -51,6 +54,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + /* * This is not negative dentry. Always valid. * diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c9a8a426a395..07f4b5e675fc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -156,8 +156,12 @@ u64 fuse_get_attr_version(struct fuse_conn *fc) */ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) { - struct inode *inode = entry->d_inode; + struct inode *inode; + + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + inode = entry->d_inode; if (inode && is_bad_inode(inode)) return 0; else if (fuse_dentry_time(entry) < get_jiffies_64()) { diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 50497f65763b..4a456338b873 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "gfs2.h" @@ -34,15 +35,23 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) { - struct dentry *parent = dget_parent(dentry); - struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode); - struct gfs2_inode *dip = GFS2_I(parent->d_inode); - struct inode *inode = dentry->d_inode; + struct dentry *parent; + struct gfs2_sbd *sdp; + struct gfs2_inode *dip; + struct inode *inode; struct gfs2_holder d_gh; struct gfs2_inode *ip = NULL; int error; int had_lock = 0; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + parent = dget_parent(dentry); + sdp = GFS2_SB(parent->d_inode); + dip = GFS2_I(parent->d_inode); + inode = dentry->d_inode; + if (inode) { if (is_bad_inode(inode)) goto invalid; diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c index 7478f5c219aa..19cf291eb91f 100644 --- a/fs/hfs/sysdep.c +++ b/fs/hfs/sysdep.c @@ -8,15 +8,20 @@ * This file contains the code to do various system dependent things. */ +#include #include "hfs_fs.h" /* dentry case-handling: just lowercase everything */ static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; + struct inode *inode; int diff; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; if(!inode) return 1; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index a151cbdec626..4414e3a42264 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1608,6 +1608,8 @@ out: static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; /* * This is not negative dentry. Always valid. * diff --git a/fs/namei.c b/fs/namei.c index 90bd2873e117..6e275363e89d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -563,10 +563,26 @@ void release_open_intent(struct nameidata *nd) fput(nd->intent.open.file); } +static int d_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + int status; + + status = dentry->d_op->d_revalidate(dentry, nd); + if (status == -ECHILD) { + if (nameidata_dentry_drop_rcu(nd, dentry)) + return status; + status = dentry->d_op->d_revalidate(dentry, nd); + } + + return status; +} + static inline struct dentry * do_revalidate(struct dentry *dentry, struct nameidata *nd) { - int status = dentry->d_op->d_revalidate(dentry, nd); + int status; + + status = d_revalidate(dentry, nd); if (unlikely(status <= 0)) { /* * The dentry failed validation. @@ -574,14 +590,20 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) * the dentry otherwise d_revalidate is asking us * to return a fail status. */ - if (!status) { + if (status < 0) { + /* If we're in rcu-walk, we don't have a ref */ + if (!(nd->flags & LOOKUP_RCU)) + dput(dentry); + dentry = ERR_PTR(status); + + } else { + /* Don't d_invalidate in rcu-walk mode */ + if (nameidata_dentry_drop_rcu_maybe(nd, dentry)) + return ERR_PTR(-ECHILD); if (!d_invalidate(dentry)) { dput(dentry); dentry = NULL; } - } else { - dput(dentry); - dentry = ERR_PTR(status); } } return dentry; @@ -626,7 +648,7 @@ force_reval_path(struct path *path, struct nameidata *nd) if (!need_reval_dot(dentry)) return 0; - status = dentry->d_op->d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd); if (status > 0) return 0; @@ -1039,12 +1061,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (dentry->d_flags & DCACHE_OP_REVALIDATE) { - /* We commonly drop rcu-walk here */ - if (nameidata_dentry_drop_rcu(nd, dentry)) - return -ECHILD; + if (dentry->d_flags & DCACHE_OP_REVALIDATE) goto need_revalidate; - } path->mnt = mnt; path->dentry = dentry; __follow_mount_rcu(nd, path, inode); @@ -1292,12 +1310,11 @@ return_reval: * We may need to check the cached dentry for staleness. */ if (need_reval_dot(nd->path.dentry)) { - if (nameidata_drop_rcu_maybe(nd)) - return -ECHILD; - err = -ESTALE; /* Note: we do not d_invalidate() */ - if (!nd->path.dentry->d_op->d_revalidate( - nd->path.dentry, nd)) + err = d_revalidate(nd->path.dentry, nd); + if (!err) + err = -ESTALE; + if (err < 0) break; } return_base: @@ -2080,10 +2097,11 @@ static struct file *do_last(struct nameidata *nd, struct path *path, dir = nd->path.dentry; case LAST_DOT: if (need_reval_dot(dir)) { - if (!dir->d_op->d_revalidate(dir, nd)) { + error = d_revalidate(nd->path.dentry, nd); + if (!error) error = -ESTALE; + if (error < 0) goto exit; - } } /* fallthrough */ case LAST_ROOT: diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 4b9cbb28d7fa..28f136d4aaec 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -308,6 +309,9 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd) int res, val = 0, len; __u8 __name[NCP_MAXPATHLEN + 1]; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + parent = dget_parent(dentry); dir = parent->d_inode; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 0c75a5f3cafd..9531c052d7a4 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -29,6 +29,7 @@ #include #include #include +#include #include diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 37e0a8bb077e..58beace14b19 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -938,7 +938,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) * component of the path. * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. */ -static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask) +static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, + unsigned int mask) { if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT)) return 0; @@ -1018,7 +1019,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) +static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *dir; struct inode *inode; @@ -1027,6 +1028,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) struct nfs_fattr *fattr = NULL; int error; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + parent = dget_parent(dentry); dir = parent->d_inode; nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 4d54c60ceee4..0310b16a7238 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -52,9 +52,15 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry) static int ocfs2_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; + struct inode *inode; int ret = 0; /* if all else fails, just return false */ - struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); + struct ocfs2_super *osb; + + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + osb = OCFS2_SB(dentry->d_sb); mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); diff --git a/fs/proc/base.c b/fs/proc/base.c index 85f0a80912aa..dc5b2fcadc3b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1719,10 +1719,16 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat */ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); + struct inode *inode; + struct task_struct *task; const struct cred *cred; + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); + if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { @@ -1888,12 +1894,19 @@ static int proc_fd_link(struct inode *inode, struct path *path) static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); - int fd = proc_fd(inode); + struct inode *inode; + struct task_struct *task; + int fd; struct files_struct *files; const struct cred *cred; + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); + fd = proc_fd(inode); + if (task) { files = get_files_struct(task); if (files) { @@ -2563,8 +2576,14 @@ static const struct pid_entry proc_base_stuff[] = { */ static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); + struct inode *inode; + struct task_struct *task; + + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); if (task) { put_task_struct(task); return 1; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 35efd85a4d32..c9097f43b425 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -389,6 +390,8 @@ static const struct inode_operations proc_sys_dir_operations = { static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; return !PROC_I(dentry->d_inode)->sysctl->unregistering; } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e0f0d7ea10a1..9ea22a56cdf1 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -972,6 +972,8 @@ int reiserfs_permission(struct inode *inode, int mask) static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; return -EPERM; } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3e076caa8daf..ea9120a830d8 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -239,9 +239,13 @@ static int sysfs_dentry_delete(const struct dentry *dentry) static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_dirent *sd; int is_dir; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + sd = dentry->d_fsdata; mutex_lock(&sysfs_mutex); /* The sysfs dirent has been deleted */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index b1aeda077258..8b2064d02928 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -190,7 +190,6 @@ struct dentry_operations { #define DCACHE_OP_REVALIDATE 0x4000 #define DCACHE_OP_DELETE 0x8000 - extern spinlock_t dcache_inode_lock; extern seqlock_t rename_lock; -- cgit v1.2.3 From b74c79e99389cd79b31fcc08f82c24e492e63c7e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:58 +1100 Subject: fs: provide rcu-walk aware permission i_ops Signed-off-by: Nick Piggin --- Documentation/filesystems/Locking | 6 +-- Documentation/filesystems/path-lookup.txt | 44 ++++++++++++++++-- Documentation/filesystems/porting | 5 +++ Documentation/filesystems/vfs.txt | 10 ++++- drivers/staging/smbfs/file.c | 5 ++- fs/9p/acl.c | 5 ++- fs/9p/acl.h | 2 +- fs/afs/internal.h | 2 +- fs/afs/security.c | 7 ++- fs/bad_inode.c | 5 ++- fs/btrfs/acl.c | 6 ++- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 7 ++- fs/ceph/inode.c | 11 +++-- fs/ceph/super.h | 2 +- fs/cifs/cifsfs.c | 7 ++- fs/coda/dir.c | 5 ++- fs/coda/pioctl.c | 6 ++- fs/ecryptfs/inode.c | 4 +- fs/ext2/acl.c | 8 +++- fs/ext2/acl.h | 2 +- fs/ext3/acl.c | 8 +++- fs/ext3/acl.h | 2 +- fs/ext4/acl.c | 8 +++- fs/ext4/acl.h | 2 +- fs/fuse/dir.c | 10 +++-- fs/generic_acl.c | 8 +++- fs/gfs2/acl.c | 5 ++- fs/gfs2/acl.h | 2 +- fs/gfs2/file.c | 2 +- fs/gfs2/inode.c | 4 +- fs/gfs2/inode.h | 2 +- fs/gfs2/ops_inode.c | 18 +++++--- fs/hostfs/hostfs_kern.c | 7 ++- fs/hpfs/namei.c | 2 +- fs/jffs2/acl.c | 5 ++- fs/jffs2/acl.h | 2 +- fs/jfs/acl.c | 8 +++- fs/jfs/jfs_acl.h | 2 +- fs/logfs/dir.c | 6 ++- fs/namei.c | 75 ++++++++++++------------------- fs/nfs/dir.c | 7 ++- fs/nilfs2/inode.c | 10 +++-- fs/nilfs2/nilfs.h | 2 +- fs/ocfs2/acl.c | 8 +++- fs/ocfs2/acl.h | 2 +- fs/ocfs2/file.c | 7 ++- fs/ocfs2/file.h | 2 +- fs/proc/base.c | 6 ++- fs/proc/proc_sysctl.c | 5 ++- fs/reiserfs/xattr.c | 14 ++++-- fs/sysfs/inode.c | 11 +++-- fs/sysfs/sysfs.h | 2 +- fs/xfs/linux-2.6/xfs_acl.c | 8 +++- fs/xfs/xfs_acl.h | 2 +- include/linux/coda_linux.h | 2 +- include/linux/fs.h | 10 +++-- include/linux/generic_acl.h | 2 +- include/linux/nfs_fs.h | 2 +- include/linux/reiserfs_xattr.h | 2 +- 60 files changed, 287 insertions(+), 146 deletions(-) (limited to 'drivers') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index e90ffe61eb65..977d8919cc69 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -47,8 +47,8 @@ ata *); void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int, struct nameidata *); - int (*check_acl)(struct inode *, int); + int (*permission) (struct inode *, int, unsigned int); + int (*check_acl)(struct inode *, int, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -76,7 +76,7 @@ follow_link: no put_link: no truncate: yes (see below) setattr: yes -permission: no +permission: no (may not block if called in rcu-walk mode) check_acl: no getattr: no setxattr: yes diff --git a/Documentation/filesystems/path-lookup.txt b/Documentation/filesystems/path-lookup.txt index 8789d1810bed..eb59c8b44be9 100644 --- a/Documentation/filesystems/path-lookup.txt +++ b/Documentation/filesystems/path-lookup.txt @@ -316,11 +316,9 @@ The detailed design for rcu-walk is like this: The cases where rcu-walk cannot continue are: * NULL dentry (ie. any uncached path element) -* parent with d_inode->i_op->permission or ACLs * Following links -In future patches, permission checks become rcu-walk aware. It may be possible -eventually to make following links rcu-walk aware. +It may be possible eventually to make following links rcu-walk aware. Uncached path elements will always require dropping to ref-walk mode, at the very least because i_mutex needs to be grabbed, and objects allocated. @@ -336,9 +334,49 @@ or stored into. The result is massive improvements in performance and scalability of path resolution. +Interesting statistics +====================== + +The following table gives rcu lookup statistics for a few simple workloads +(2s12c24t Westmere, debian non-graphical system). Ungraceful are attempts to +drop rcu that fail due to d_seq failure and requiring the entire path lookup +again. Other cases are successful rcu-drops that are required before the final +element, nodentry for missing dentry, revalidate for filesystem revalidate +routine requiring rcu drop, permission for permission check requiring drop, +and link for symlink traversal requiring drop. + + rcu-lookups restart nodentry link revalidate permission +bootup 47121 0 4624 1010 10283 7852 +dbench 25386793 0 6778659(26.7%) 55 549 1156 +kbuild 2696672 10 64442(2.3%) 108764(4.0%) 1 1590 +git diff 39605 0 28 2 0 106 +vfstest 24185492 4945 708725(2.9%) 1076136(4.4%) 0 2651 + +What this shows is that failed rcu-walk lookups, ie. ones that are restarted +entirely with ref-walk, are quite rare. Even the "vfstest" case which +specifically has concurrent renames/mkdir/rmdir/ creat/unlink/etc to excercise +such races is not showing a huge amount of restarts. + +Dropping from rcu-walk to ref-walk mean that we have encountered a dentry where +the reference count needs to be taken for some reason. This is either because +we have reached the target of the path walk, or because we have encountered a +condition that can't be resolved in rcu-walk mode. Ideally, we drop rcu-walk +only when we have reached the target dentry, so the other statistics show where +this does not happen. + +Note that a graceful drop from rcu-walk mode due to something such as the +dentry not existing (which can be common) is not necessarily a failure of +rcu-walk scheme, because some elements of the path may have been walked in +rcu-walk mode. The further we get from common path elements (such as cwd or +root), the less contended the dentry is likely to be. The closer we are to +common path elements, the more likely they will exist in dentry cache. + + Papers and other documentation on dcache locking ================================================ 1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124). 2. http://lse.sourceforge.net/locking/dcache/dcache.html + + diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index cd9756a2709d..07a32b42cf9c 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -379,4 +379,9 @@ where possible. the filesystem provides it), which requires dropping out of rcu-walk mode. This may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be returned if the filesystem cannot handle rcu-walk. See +Documentation/filesystems/vfs.txt for more details. + + permission and check_acl are inode permission checks that are called +on many or all directory inodes on the way down a path walk (to check for +exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See Documentation/filesystems/vfs.txt for more details. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c936b4912383..fbb324e2bd43 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -325,7 +325,8 @@ struct inode_operations { void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int, struct nameidata *); + int (*permission) (struct inode *, int, unsigned int); + int (*check_acl)(struct inode *, int, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -414,6 +415,13 @@ otherwise noted. permission: called by the VFS to check for access rights on a POSIX-like filesystem. + May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk + mode, the filesystem must check the permission without blocking or + storing to the inode. + + If a situation is encountered that rcu-walk cannot handle, return + -ECHILD and it will be called again in ref-walk mode. + setattr: called by the VFS to set attributes for a file. This method is called by chmod(2) and related system calls. diff --git a/drivers/staging/smbfs/file.c b/drivers/staging/smbfs/file.c index 5dcd19c60eb9..31372e7b12de 100644 --- a/drivers/staging/smbfs/file.c +++ b/drivers/staging/smbfs/file.c @@ -407,11 +407,14 @@ smb_file_release(struct inode *inode, struct file * file) * privileges, so we need our own check for this. */ static int -smb_file_permission(struct inode *inode, int mask) +smb_file_permission(struct inode *inode, int mask, unsigned int flags) { int mode = inode->i_mode; int error = 0; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + VERBOSE("mode=%x, mask=%x\n", mode, mask); /* Look at user permissions */ diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 12d602351dbe..6e58c4ca1e6e 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -91,11 +91,14 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) return acl; } -int v9fs_check_acl(struct inode *inode, int mask) +int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags) { struct posix_acl *acl; struct v9fs_session_info *v9ses; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + v9ses = v9fs_inode2v9ses(inode); if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { /* diff --git a/fs/9p/acl.h b/fs/9p/acl.h index 59e18c2e8c7e..7ef3ac9f6d95 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -16,7 +16,7 @@ #ifdef CONFIG_9P_FS_POSIX_ACL extern int v9fs_get_acl(struct inode *, struct p9_fid *); -extern int v9fs_check_acl(struct inode *inode, int mask); +extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags); extern int v9fs_acl_chmod(struct dentry *); extern int v9fs_set_create_acl(struct dentry *, struct posix_acl *, struct posix_acl *); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index cca8eef736fc..6d4bc1c8ff60 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -624,7 +624,7 @@ extern void afs_clear_permits(struct afs_vnode *); extern void afs_cache_permit(struct afs_vnode *, struct key *, long); extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); -extern int afs_permission(struct inode *, int); +extern int afs_permission(struct inode *, int, unsigned int); /* * server.c diff --git a/fs/afs/security.c b/fs/afs/security.c index bb4ed144d0e4..f44b9d355377 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -285,13 +285,16 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, * - AFS ACLs are attached to directories only, and a file is controlled by its * parent directory's ACL */ -int afs_permission(struct inode *inode, int mask) +int afs_permission(struct inode *inode, int mask, unsigned int flags) { struct afs_vnode *vnode = AFS_FS_I(inode); afs_access_t uninitialized_var(access); struct key *key; int ret; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + _enter("{{%x:%u},%lx},%x,", vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); @@ -347,7 +350,7 @@ int afs_permission(struct inode *inode, int mask) } key_put(key); - ret = generic_permission(inode, mask, NULL); + ret = generic_permission(inode, mask, flags, NULL); _leave(" = %d", ret); return ret; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index f024d8aaddef..9ad2369d9e35 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -229,8 +229,11 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, return -EIO; } -static int bad_inode_permission(struct inode *inode, int mask) +static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + return -EIO; } diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 2222d161c7b6..cb518a4b917c 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -185,13 +185,15 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, return ret; } -int btrfs_check_acl(struct inode *inode, int mask) +int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) { struct posix_acl *acl; int error = -EAGAIN; - acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index af52f6d7a4d8..a142d204b526 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2544,7 +2544,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait); /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL -int btrfs_check_acl(struct inode *inode, int mask); +int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); #else #define btrfs_check_acl NULL #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 63e4546b478a..5cf0db0081f9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7211,11 +7211,14 @@ static int btrfs_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } -static int btrfs_permission(struct inode *inode, int mask) +static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) return -EACCES; - return generic_permission(inode, mask, btrfs_check_acl); + return generic_permission(inode, mask, flags, btrfs_check_acl); } static const struct inode_operations btrfs_dir_inode_operations = { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 47f8c8baf3b5..e61de4f7b99d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1781,12 +1781,17 @@ int ceph_do_getattr(struct inode *inode, int mask) * Check inode permissions. We verify we have a valid value for * the AUTH cap, then call the generic handler. */ -int ceph_permission(struct inode *inode, int mask) +int ceph_permission(struct inode *inode, int mask, unsigned int flags) { - int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); + int err; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); if (!err) - err = generic_permission(inode, mask, NULL); + err = generic_permission(inode, mask, flags, NULL); return err; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7f01728a4657..4553d8829edb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -665,7 +665,7 @@ extern void ceph_queue_invalidate(struct inode *inode); extern void ceph_queue_writeback(struct inode *inode); extern int ceph_do_getattr(struct inode *inode, int mask); -extern int ceph_permission(struct inode *inode, int mask); +extern int ceph_permission(struct inode *inode, int mask, unsigned int flags); extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 223717dcc401..8e21e0fe65d5 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -283,10 +283,13 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int cifs_permission(struct inode *inode, int mask) +static int cifs_permission(struct inode *inode, int mask, unsigned int flags) { struct cifs_sb_info *cifs_sb; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + cifs_sb = CIFS_SB(inode->i_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { @@ -298,7 +301,7 @@ static int cifs_permission(struct inode *inode, int mask) on the client (above and beyond ACL on servers) for servers which do not support setting and viewing mode bits, so allowing client to check permissions is useful */ - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, flags, NULL); } static struct kmem_cache *cifs_inode_cachep; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 619a8303766e..29badd91360f 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -135,10 +135,13 @@ exit: } -int coda_permission(struct inode *inode, int mask) +int coda_permission(struct inode *inode, int mask, unsigned int flags) { int error; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (!mask) diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 2fd89b5c5c7b..741f0bd03918 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -24,7 +24,7 @@ #include /* pioctl ops */ -static int coda_ioctl_permission(struct inode *inode, int mask); +static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags); static long coda_pioctl(struct file *filp, unsigned int cmd, unsigned long user_data); @@ -41,8 +41,10 @@ const struct file_operations coda_ioctl_operations = { }; /* the coda pioctl inode ops */ -static int coda_ioctl_permission(struct inode *inode, int mask) +static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; return (mask & MAY_EXEC) ? -EACCES : 0; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index f91b35db4c6e..337352a94751 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -980,8 +980,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) } static int -ecryptfs_permission(struct inode *inode, int mask) +ecryptfs_permission(struct inode *inode, int mask, unsigned int flags) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; return inode_permission(ecryptfs_inode_to_lower(inode), mask); } diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 2bcc0431bada..dd9bb3f0c8d7 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -232,10 +232,14 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) } int -ext2_check_acl(struct inode *inode, int mask) +ext2_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); + struct posix_acl *acl; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 3ff6cbb9ac44..c939b7b12099 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size) #ifdef CONFIG_EXT2_FS_POSIX_ACL /* acl.c */ -extern int ext2_check_acl (struct inode *, int); +extern int ext2_check_acl (struct inode *, int, unsigned int); extern int ext2_acl_chmod (struct inode *); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 8a11fe212183..9e49da8302d3 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -240,10 +240,14 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, } int -ext3_check_acl(struct inode *inode, int mask) +ext3_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); + struct posix_acl *acl; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 597334626de9..5faf8048e906 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size) #ifdef CONFIG_EXT3_FS_POSIX_ACL /* acl.c */ -extern int ext3_check_acl (struct inode *, int); +extern int ext3_check_acl (struct inode *, int, unsigned int); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 5e2ed4504ead..373dcaeedba9 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -238,10 +238,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, } int -ext4_check_acl(struct inode *inode, int mask) +ext4_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); + struct posix_acl *acl; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 9d843d5deac4..dec821168fd4 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size) #ifdef CONFIG_EXT4_FS_POSIX_ACL /* acl.c */ -extern int ext4_check_acl(struct inode *, int); +extern int ext4_check_acl(struct inode *, int, unsigned int); extern int ext4_acl_chmod(struct inode *); extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 07f4b5e675fc..f738599fd8cd 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -985,12 +985,15 @@ static int fuse_access(struct inode *inode, int mask) * access request is sent. Execute permission is still checked * locally based on file mode. */ -static int fuse_permission(struct inode *inode, int mask) +static int fuse_permission(struct inode *inode, int mask, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); bool refreshed = false; int err = 0; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + if (!fuse_allow_task(fc, current)) return -EACCES; @@ -1005,7 +1008,7 @@ static int fuse_permission(struct inode *inode, int mask) } if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { - err = generic_permission(inode, mask, NULL); + err = generic_permission(inode, mask, flags, NULL); /* If permission is denied, try to refresh file attributes. This is also needed, because the root @@ -1013,7 +1016,8 @@ static int fuse_permission(struct inode *inode, int mask) if (err == -EACCES && !refreshed) { err = fuse_do_getattr(inode, NULL, NULL); if (!err) - err = generic_permission(inode, mask, NULL); + err = generic_permission(inode, mask, + flags, NULL); } /* Note: the opposite of the above test does not diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 6bc9e3a5a693..628004282130 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -190,10 +190,14 @@ generic_acl_chmod(struct inode *inode) } int -generic_check_acl(struct inode *inode, int mask) +generic_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS); + struct posix_acl *acl; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = get_cached_acl(inode, ACL_TYPE_ACCESS); if (acl) { int error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 48171f4c943d..7118f1a780a9 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -75,11 +75,14 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) * Returns: errno */ -int gfs2_check_acl(struct inode *inode, int mask) +int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags) { struct posix_acl *acl; int error; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index b522b0cb39ea..a93907c8159b 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -16,7 +16,7 @@ #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" #define GFS2_ACL_MAX_ENTRIES 25 -extern int gfs2_check_acl(struct inode *inode, int mask); +extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int); extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); extern const struct xattr_handler gfs2_xattr_system_handler; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index aa996471ec5c..fca6689e12e6 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -241,7 +241,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) !capable(CAP_LINUX_IMMUTABLE)) goto out; if (!IS_IMMUTABLE(inode)) { - error = gfs2_permission(inode, MAY_WRITE); + error = gfs2_permission(inode, MAY_WRITE, 0); if (error) goto out; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index e1213f7f9217..6163be9686be 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -509,7 +509,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, } if (!is_root) { - error = gfs2_permission(dir, MAY_EXEC); + error = gfs2_permission(dir, MAY_EXEC, 0); if (error) goto out; } @@ -539,7 +539,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, { int error; - error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); if (error) return error; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index d8499fadcc53..732a183efdb3 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -113,7 +113,7 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, extern struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode, dev_t dev); -extern int gfs2_permission(struct inode *inode, int mask); +extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index f28f89796f4d..5c63a06fcd7c 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -166,7 +166,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_child; - error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0); if (error) goto out_gunlock; @@ -289,7 +289,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (IS_APPEND(&dip->i_inode)) return -EPERM; - error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); if (error) return error; @@ -822,7 +822,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, } } } else { - error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0); if (error) goto out_gunlock; @@ -857,7 +857,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Check out the dir to be renamed */ if (dir_rename) { - error = gfs2_permission(odentry->d_inode, MAY_WRITE); + error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0); if (error) goto out_gunlock; } @@ -1041,13 +1041,17 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) * Returns: errno */ -int gfs2_permission(struct inode *inode, int mask) +int gfs2_permission(struct inode *inode, int mask, unsigned int flags) { - struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_inode *ip; struct gfs2_holder i_gh; int error; int unlock = 0; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) @@ -1058,7 +1062,7 @@ int gfs2_permission(struct inode *inode, int mask) if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) error = -EACCES; else - error = generic_permission(inode, mask, gfs2_check_acl); + error = generic_permission(inode, mask, flags, gfs2_check_acl); if (unlock) gfs2_glock_dq_uninit(&i_gh); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 0bc81cf256b8..d3244d949a4e 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -749,11 +749,14 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, return err; } -int hostfs_permission(struct inode *ino, int desired) +int hostfs_permission(struct inode *ino, int desired, unsigned int flags) { char *name; int r = 0, w = 0, x = 0, err; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + if (desired & MAY_READ) r = 1; if (desired & MAY_WRITE) w = 1; if (desired & MAY_EXEC) x = 1; @@ -768,7 +771,7 @@ int hostfs_permission(struct inode *ino, int desired) err = access_file(name, r, w, x); __putname(name); if (!err) - err = generic_permission(ino, desired, NULL); + err = generic_permission(ino, desired, flags, NULL); return err; } diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 11c2b4080f65..f4ad9e31ddc4 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -419,7 +419,7 @@ again: unlock_kernel(); return -ENOSPC; } - if (generic_permission(inode, MAY_WRITE, NULL) || + if (generic_permission(inode, MAY_WRITE, 0, NULL) || !S_ISREG(inode->i_mode) || get_write_access(inode)) { d_rehash(dentry); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 54a92fd02bbd..95b79672150a 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -259,11 +259,14 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return rc; } -int jffs2_check_acl(struct inode *inode, int mask) +int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags) { struct posix_acl *acl; int rc; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 5e42de8d9541..3119f59253d3 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -26,7 +26,7 @@ struct jffs2_acl_header { #ifdef CONFIG_JFFS2_FS_POSIX_ACL -extern int jffs2_check_acl(struct inode *, int); +extern int jffs2_check_acl(struct inode *, int, unsigned int); extern int jffs2_acl_chmod(struct inode *); extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); extern int jffs2_init_acl_post(struct inode *); diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 1057a4998e4e..e5de9422fa32 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -114,10 +114,14 @@ out: return rc; } -int jfs_check_acl(struct inode *inode, int mask) +int jfs_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); + struct posix_acl *acl; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 54e07559878d..f9285c4900fa 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -20,7 +20,7 @@ #ifdef CONFIG_JFS_POSIX_ACL -int jfs_check_acl(struct inode *, int); +int jfs_check_acl(struct inode *, int, unsigned int flags); int jfs_init_acl(tid_t, struct inode *, struct inode *); int jfs_acl_chmod(struct inode *inode); diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 409dfd65e9a1..f9ddf0c388c8 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -555,9 +555,11 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry, return __logfs_create(dir, dentry, inode, target, destlen); } -static int logfs_permission(struct inode *inode, int mask) +static int logfs_permission(struct inode *inode, int mask, unsigned int flags) { - return generic_permission(inode, mask, NULL); + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + return generic_permission(inode, mask, flags, NULL); } static int logfs_link(struct dentry *old_dentry, struct inode *dir, diff --git a/fs/namei.c b/fs/namei.c index 6e275363e89d..4e957bf744ae 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname); /* * This does basic POSIX ACL permission checking */ -static inline int __acl_permission_check(struct inode *inode, int mask, - int (*check_acl)(struct inode *inode, int mask), int rcu) +static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, + int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) { umode_t mode = inode->i_mode; @@ -180,13 +180,9 @@ static inline int __acl_permission_check(struct inode *inode, int mask, mode >>= 6; else { if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { - if (rcu) { - return -ECHILD; - } else { - int error = check_acl(inode, mask); - if (error != -EAGAIN) - return error; - } + int error = check_acl(inode, mask, flags); + if (error != -EAGAIN) + return error; } if (in_group_p(inode->i_gid)) @@ -201,32 +197,31 @@ static inline int __acl_permission_check(struct inode *inode, int mask, return -EACCES; } -static inline int acl_permission_check(struct inode *inode, int mask, - int (*check_acl)(struct inode *inode, int mask)) -{ - return __acl_permission_check(inode, mask, check_acl, 0); -} - /** - * generic_permission - check for access rights on a Posix-like filesystem + * generic_permission - check for access rights on a Posix-like filesystem * @inode: inode to check access rights for * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * @check_acl: optional callback to check for Posix ACLs + * @flags IPERM_FLAG_ flags. * * Used to check for read/write/execute permissions on a file. * We use "fsuid" for this, letting us set arbitrary permissions * for filesystem access without changing the "normal" uids which - * are used for other things.. + * are used for other things. + * + * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk + * request cannot be satisfied (eg. requires blocking or too much complexity). + * It would then be called again in ref-walk mode. */ -int generic_permission(struct inode *inode, int mask, - int (*check_acl)(struct inode *inode, int mask)) +int generic_permission(struct inode *inode, int mask, unsigned int flags, + int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) { int ret; /* * Do the basic POSIX ACL permission checks. */ - ret = acl_permission_check(inode, mask, check_acl); + ret = acl_permission_check(inode, mask, flags, check_acl); if (ret != -EACCES) return ret; @@ -281,9 +276,10 @@ int inode_permission(struct inode *inode, int mask) } if (inode->i_op->permission) - retval = inode->i_op->permission(inode, mask); + retval = inode->i_op->permission(inode, mask, 0); else - retval = generic_permission(inode, mask, inode->i_op->check_acl); + retval = generic_permission(inode, mask, 0, + inode->i_op->check_acl); if (retval) return retval; @@ -668,22 +664,19 @@ force_reval_path(struct path *path, struct nameidata *nd) * short-cut DAC fails, then call ->permission() to do more * complete permission check. */ -static inline int __exec_permission(struct inode *inode, int rcu) +static inline int exec_permission(struct inode *inode, unsigned int flags) { int ret; if (inode->i_op->permission) { - if (rcu) - return -ECHILD; - ret = inode->i_op->permission(inode, MAY_EXEC); - if (!ret) - goto ok; - return ret; + ret = inode->i_op->permission(inode, MAY_EXEC, flags); + } else { + ret = acl_permission_check(inode, MAY_EXEC, flags, + inode->i_op->check_acl); } - ret = __acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl, rcu); - if (!ret) + if (likely(!ret)) goto ok; - if (rcu && ret == -ECHILD) + if (ret == -ECHILD) return ret; if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) @@ -691,17 +684,7 @@ static inline int __exec_permission(struct inode *inode, int rcu) return ret; ok: - return security_inode_exec_permission(inode, rcu); -} - -static int exec_permission(struct inode *inode) -{ - return __exec_permission(inode, 0); -} - -static int exec_permission_rcu(struct inode *inode) -{ - return __exec_permission(inode, 1); + return security_inode_exec_permission(inode, flags); } static __always_inline void set_root(struct nameidata *nd) @@ -1165,7 +1148,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) nd->flags |= LOOKUP_CONTINUE; if (nd->flags & LOOKUP_RCU) { - err = exec_permission_rcu(nd->inode); + err = exec_permission(nd->inode, IPERM_FLAG_RCU); if (err == -ECHILD) { if (nameidata_drop_rcu(nd)) return -ECHILD; @@ -1173,7 +1156,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) } } else { exec_again: - err = exec_permission(nd->inode); + err = exec_permission(nd->inode, 0); } if (err) break; @@ -1620,7 +1603,7 @@ static struct dentry *__lookup_hash(struct qstr *name, struct dentry *dentry; int err; - err = exec_permission(inode); + err = exec_permission(inode, 0); if (err) return ERR_PTR(err); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 58beace14b19..d33da530097a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2189,11 +2189,14 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } -int nfs_permission(struct inode *inode, int mask) +int nfs_permission(struct inode *inode, int mask, unsigned int flags) { struct rpc_cred *cred; int res = 0; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + nfs_inc_stats(inode, NFSIOS_VFSACCESS); if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) @@ -2241,7 +2244,7 @@ out: out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) - res = generic_permission(inode, mask, NULL); + res = generic_permission(inode, mask, flags, NULL); goto out; } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 71d4bc8464e0..77b48c8fab17 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -785,15 +785,19 @@ out_err: return err; } -int nilfs_permission(struct inode *inode, int mask) +int nilfs_permission(struct inode *inode, int mask, unsigned int flags) { - struct nilfs_root *root = NILFS_I(inode)->i_root; + struct nilfs_root *root; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + root = NILFS_I(inode)->i_root; if ((mask & MAY_WRITE) && root && root->cno != NILFS_CPTREE_CURRENT_CNO) return -EROFS; /* snapshot is not writable */ - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, flags, NULL); } int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index f7560da5a567..0ca98823db59 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -256,7 +256,7 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); extern int nilfs_setattr(struct dentry *, struct iattr *); -int nilfs_permission(struct inode *inode, int mask); +int nilfs_permission(struct inode *inode, int mask, unsigned int flags); extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, struct buffer_head **); extern int nilfs_inode_dirty(struct inode *); diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 391915093fe1..704f6b1742f3 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -291,13 +291,17 @@ static int ocfs2_set_acl(handle_t *handle, return ret; } -int ocfs2_check_acl(struct inode *inode, int mask) +int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_super *osb; struct buffer_head *di_bh = NULL; struct posix_acl *acl; int ret = -EAGAIN; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + osb = OCFS2_SB(inode->i_sb); if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return ret; diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 5c5d31f05853..4fe7c9cf4bfb 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -26,7 +26,7 @@ struct ocfs2_acl_entry { __le32 e_id; }; -extern int ocfs2_check_acl(struct inode *, int); +extern int ocfs2_check_acl(struct inode *, int, unsigned int); extern int ocfs2_acl_chmod(struct inode *); extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, struct buffer_head *, struct buffer_head *, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f6cba566429d..bdadbae09094 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1307,10 +1307,13 @@ bail: return err; } -int ocfs2_permission(struct inode *inode, int mask) +int ocfs2_permission(struct inode *inode, int mask, unsigned int flags) { int ret; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + mlog_entry_void(); ret = ocfs2_inode_lock(inode, NULL, 0); @@ -1320,7 +1323,7 @@ int ocfs2_permission(struct inode *inode, int mask) goto out; } - ret = generic_permission(inode, mask, ocfs2_check_acl); + ret = generic_permission(inode, mask, flags, ocfs2_check_acl); ocfs2_inode_unlock(inode, 0); out: diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 97bf761c9e7c..f5afbbef6703 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -int ocfs2_permission(struct inode *inode, int mask); +int ocfs2_permission(struct inode *inode, int mask, unsigned int flags); int ocfs2_should_update_atime(struct inode *inode, struct vfsmount *vfsmnt); diff --git a/fs/proc/base.c b/fs/proc/base.c index dc5b2fcadc3b..b953d41d9abf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2114,11 +2114,13 @@ static const struct file_operations proc_fd_operations = { * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). */ -static int proc_fd_permission(struct inode *inode, int mask) +static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) { int rv; - rv = generic_permission(inode, mask, NULL); + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + rv = generic_permission(inode, mask, flags, NULL); if (rv == 0) return 0; if (task_pid(current) == proc_pid(inode)) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index c9097f43b425..09a1f92a34ef 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -295,7 +295,7 @@ out: return ret; } -static int proc_sys_permission(struct inode *inode, int mask) +static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags) { /* * sysctl entries that are not writeable, @@ -305,6 +305,9 @@ static int proc_sys_permission(struct inode *inode, int mask) struct ctl_table *table; int error; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + /* Executable files are not allowed under /proc/sys/ */ if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) return -EACCES; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 9ea22a56cdf1..3cfb2e933644 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -870,11 +870,14 @@ out: return err; } -static int reiserfs_check_acl(struct inode *inode, int mask) +static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags) { struct posix_acl *acl; int error = -EAGAIN; /* do regular unix permission checks by default */ + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); if (acl) { @@ -951,8 +954,10 @@ static int xattr_mount_check(struct super_block *s) return 0; } -int reiserfs_permission(struct inode *inode, int mask) +int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; /* * We don't do permission checks on the internal objects. * Permissions are determined by the "owning" object. @@ -965,9 +970,10 @@ int reiserfs_permission(struct inode *inode, int mask) * Stat data v1 doesn't support ACLs. */ if (get_inode_sd_version(inode) != STAT_DATA_V1) - return generic_permission(inode, mask, reiserfs_check_acl); + return generic_permission(inode, mask, flags, + reiserfs_check_acl); #endif - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, flags, NULL); } static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index cffb1fd8ba33..30ac27345586 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -348,13 +348,18 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha return -ENOENT; } -int sysfs_permission(struct inode *inode, int mask) +int sysfs_permission(struct inode *inode, int mask, unsigned int flags) { - struct sysfs_dirent *sd = inode->i_private; + struct sysfs_dirent *sd; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + sd = inode->i_private; mutex_lock(&sysfs_mutex); sysfs_refresh_inode(sd, inode); mutex_unlock(&sysfs_mutex); - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, flags, NULL); } diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index d9be60a2e956..ffaaa816bfba 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -200,7 +200,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd) struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); void sysfs_evict_inode(struct inode *inode); int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); -int sysfs_permission(struct inode *inode, int mask); +int sysfs_permission(struct inode *inode, int mask, unsigned int flags); int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index b2771862fd3d..4b11eaf6a580 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -219,12 +219,16 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } int -xfs_check_acl(struct inode *inode, int mask) +xfs_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip; struct posix_acl *acl; int error = -EAGAIN; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + ip = XFS_I(inode); trace_xfs_check_acl(ip); /* diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 0135e2a669d7..11dd72070cbb 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -42,7 +42,7 @@ struct xfs_acl { #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) #ifdef CONFIG_XFS_POSIX_ACL -extern int xfs_check_acl(struct inode *inode, int mask); +extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags); extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); extern int xfs_acl_chmod(struct inode *inode); diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index 2e914d0771b9..4ccc59c1ea82 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -37,7 +37,7 @@ extern const struct file_operations coda_ioctl_operations; /* operations shared over more than one file */ int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); -int coda_permission(struct inode *inode, int mask); +int coda_permission(struct inode *inode, int mask, unsigned int flags); int coda_revalidate_inode(struct dentry *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); diff --git a/include/linux/fs.h b/include/linux/fs.h index a04aa96acb71..d5a4d42f655b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1550,11 +1550,13 @@ struct file_operations { int (*setlease)(struct file *, long, struct file_lock **); }; +#define IPERM_FLAG_RCU 0x0001 + struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); void * (*follow_link) (struct dentry *, struct nameidata *); - int (*permission) (struct inode *, int); - int (*check_acl)(struct inode *, int); + int (*permission) (struct inode *, int, unsigned int); + int (*check_acl)(struct inode *, int, unsigned int); int (*readlink) (struct dentry *, char __user *,int); void (*put_link) (struct dentry *, struct nameidata *, void *); @@ -2165,8 +2167,8 @@ extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *); extern int inode_permission(struct inode *, int); -extern int generic_permission(struct inode *, int, - int (*check_acl)(struct inode *, int)); +extern int generic_permission(struct inode *, int, unsigned int, + int (*check_acl)(struct inode *, int, unsigned int)); static inline bool execute_ok(struct inode *inode) { diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h index 574bea4013b6..0437e377b555 100644 --- a/include/linux/generic_acl.h +++ b/include/linux/generic_acl.h @@ -10,6 +10,6 @@ extern const struct xattr_handler generic_acl_default_handler; int generic_acl_init(struct inode *, struct inode *); int generic_acl_chmod(struct inode *); -int generic_check_acl(struct inode *inode, int mask); +int generic_check_acl(struct inode *inode, int mask, unsigned int flags); #endif /* LINUX_GENERIC_ACL_H */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 29d504d5d1c3..0779bb8f95be 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -351,7 +351,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); -extern int nfs_permission(struct inode *, int); +extern int nfs_permission(struct inode *, int, unsigned int); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index b2cf2089769b..3b94c91f20a6 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -41,7 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags); int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); -int reiserfs_permission(struct inode *inode, int mask); +int reiserfs_permission(struct inode *inode, int mask, unsigned int flags); #ifdef CONFIG_REISERFS_FS_XATTR #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) -- cgit v1.2.3 From b3e19d924b6eaf2ca7d22cba99a517c5171007b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:11 +1100 Subject: fs: scale mntget/mntput The problem that this patch aims to fix is vfsmount refcounting scalability. We need to take a reference on the vfsmount for every successful path lookup, which often go to the same mount point. The fundamental difficulty is that a "simple" reference count can never be made scalable, because any time a reference is dropped, we must check whether that was the last reference. To do that requires communication with all other CPUs that may have taken a reference count. We can make refcounts more scalable in a couple of ways, involving keeping distributed counters, and checking for the global-zero condition less frequently. - check the global sum once every interval (this will delay zero detection for some interval, so it's probably a showstopper for vfsmounts). - keep a local count and only taking the global sum when local reaches 0 (this is difficult for vfsmounts, because we can't hold preempt off for the life of a reference, so a counter would need to be per-thread or tied strongly to a particular CPU which requires more locking). - keep a local difference of increments and decrements, which allows us to sum the total difference and hence find the refcount when summing all CPUs. Then, keep a single integer "long" refcount for slow and long lasting references, and only take the global sum of local counters when the long refcount is 0. This last scheme is what I implemented here. Attached mounts and process root and working directory references are "long" references, and everything else is a short reference. This allows scalable vfsmount references during path walking over mounted subtrees and unattached (lazy umounted) mounts with processes still running in them. This results in one fewer atomic op in the fastpath: mntget is now just a per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock and non-atomic decrement in the common case. However code is otherwise bigger and heavier, so single threaded performance is basically a wash. Signed-off-by: Nick Piggin --- arch/ia64/kernel/perfmon.c | 2 +- drivers/mtd/mtdchar.c | 2 +- fs/anon_inodes.c | 2 +- fs/fs_struct.c | 26 +++-- fs/internal.h | 1 + fs/namei.c | 24 +++++ fs/namespace.c | 242 +++++++++++++++++++++++++++++++++++++-------- fs/pipe.c | 2 +- fs/pnode.c | 4 +- fs/super.c | 2 +- include/linux/mount.h | 53 ++++------ include/linux/path.h | 2 + net/socket.c | 19 +++- 13 files changed, 283 insertions(+), 98 deletions(-) (limited to 'drivers') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 5a24f40bb48e..f099b82703d8 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -1542,7 +1542,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) * any operations on the root directory. However, we need a non-trivial * d_name - pfm: will go nicely and kill the special-casing in procfs. */ -static struct vfsmount *pfmfs_mnt; +static struct vfsmount *pfmfs_mnt __read_mostly; static int __init init_pfm_fs(void) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 4759d827e8c7..f511dd15fd31 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1201,7 +1201,7 @@ err_unregister_chdev: static void __exit cleanup_mtdchar(void) { unregister_mtd_user(&mtdchar_notifier); - mntput(mtd_inode_mnt); + mntput_long(mtd_inode_mnt); unregister_filesystem(&mtd_inodefs_type); __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 9d92b33da8a0..5fd38112a6ca 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -232,7 +232,7 @@ static int __init anon_inode_init(void) return 0; err_mntput: - mntput(anon_inode_mnt); + mntput_long(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 60b8531f41c5..68ca487bedb1 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -17,11 +17,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; - path_get(path); + path_get_long(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put(&old_root); + path_put_long(&old_root); } /* @@ -36,12 +36,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; - path_get(path); + path_get_long(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put(&old_pwd); + path_put_long(&old_pwd); } void chroot_fs_refs(struct path *old_root, struct path *new_root) @@ -59,13 +59,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) write_seqcount_begin(&fs->seq); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->root = *new_root; count++; } if (fs->pwd.dentry == old_root->dentry && fs->pwd.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->pwd = *new_root; count++; } @@ -76,13 +76,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put(old_root); + path_put_long(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put(&fs->root); - path_put(&fs->pwd); + path_put_long(&fs->root); + path_put_long(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -115,7 +115,13 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) spin_lock_init(&fs->lock); seqcount_init(&fs->seq); fs->umask = old->umask; - get_fs_root_and_pwd(old, &fs->root, &fs->pwd); + + spin_lock(&old->lock); + fs->root = old->root; + path_get_long(&fs->root); + fs->pwd = old->pwd; + path_get_long(&fs->pwd); + spin_unlock(&old->lock); } return fs; } diff --git a/fs/internal.h b/fs/internal.h index e43b9a4dbf4e..9687c2ee2735 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **); extern void free_vfsmnt(struct vfsmount *); extern struct vfsmount *alloc_vfsmnt(const char *); +extern unsigned int mnt_get_count(struct vfsmount *mnt); extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, struct vfsmount *); diff --git a/fs/namei.c b/fs/namei.c index 4e957bf744ae..19433cdba011 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -367,6 +367,18 @@ void path_get(struct path *path) } EXPORT_SYMBOL(path_get); +/** + * path_get_long - get a long reference to a path + * @path: path to get the reference to + * + * Given a path increment the reference count to the dentry and the vfsmount. + */ +void path_get_long(struct path *path) +{ + mntget_long(path->mnt); + dget(path->dentry); +} + /** * path_put - put a reference to a path * @path: path to put the reference to @@ -380,6 +392,18 @@ void path_put(struct path *path) } EXPORT_SYMBOL(path_put); +/** + * path_put_long - put a long reference to a path + * @path: path to put the reference to + * + * Given a path decrement the reference count to the dentry and the vfsmount. + */ +void path_put_long(struct path *path) +{ + dput(path->dentry); + mntput_long(path->mnt); +} + /** * nameidata_drop_rcu - drop this nameidata out of rcu-walk * @nd: nameidata pathwalk data to drop diff --git a/fs/namespace.c b/fs/namespace.c index 03b82350f020..3ddfd9046c44 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt) mnt->mnt_group_id = 0; } +/* + * vfsmount lock must be held for read + */ +static inline void mnt_add_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_add(mnt->mnt_pcp->mnt_count, n); +#else + preempt_disable(); + mnt->mnt_count += n; + preempt_enable(); +#endif +} + +static inline void mnt_set_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_write(mnt->mnt_pcp->mnt_count, n); +#else + mnt->mnt_count = n; +#endif +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_inc_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, 1); +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_dec_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, -1); +} + +/* + * vfsmount lock must be held for write + */ +unsigned int mnt_get_count(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + unsigned int count = atomic_read(&mnt->mnt_longrefs); + int cpu; + + for_each_possible_cpu(cpu) { + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; + } + + return count; +#else + return mnt->mnt_count; +#endif +} + struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name) goto out_free_id; } - atomic_set(&mnt->mnt_count, 1); +#ifdef CONFIG_SMP + mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); + if (!mnt->mnt_pcp) + goto out_free_devname; + + atomic_set(&mnt->mnt_longrefs, 1); +#else + mnt->mnt_count = 1; + mnt->mnt_writers = 0; +#endif + INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); @@ -165,13 +233,6 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); -#endif -#ifdef CONFIG_SMP - mnt->mnt_writers = alloc_percpu(int); - if (!mnt->mnt_writers) - goto out_free_devname; -#else - mnt->mnt_writers = 0; #endif } return mnt; @@ -219,7 +280,7 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly); static inline void mnt_inc_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; + this_cpu_inc(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers++; #endif @@ -228,7 +289,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt) static inline void mnt_dec_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; + this_cpu_dec(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers--; #endif @@ -241,7 +302,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt) int cpu; for_each_possible_cpu(cpu) { - count += *per_cpu_ptr(mnt->mnt_writers, cpu); + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; } return count; @@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt) kfree(mnt->mnt_devname); mnt_free_id(mnt); #ifdef CONFIG_SMP - free_percpu(mnt->mnt_writers); + free_percpu(mnt->mnt_pcp); #endif kmem_cache_free(mnt_cache, mnt); } @@ -652,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, return NULL; } -static inline void __mntput(struct vfsmount *mnt) +static inline void mntfree(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; + /* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this @@ -662,8 +724,8 @@ static inline void __mntput(struct vfsmount *mnt) * to make r/w->r/o transitions. */ /* - * atomic_dec_and_lock() used to deal with ->mnt_count decrements - * provides barriers, so mnt_get_writers() below is safe. AV + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. */ WARN_ON(mnt_get_writers(mnt)); fsnotify_vfsmount_delete(mnt); @@ -672,28 +734,113 @@ static inline void __mntput(struct vfsmount *mnt) deactivate_super(sb); } -void mntput_no_expire(struct vfsmount *mnt) -{ -repeat: - if (atomic_add_unless(&mnt->mnt_count, -1, 1)) - return; +#ifdef CONFIG_SMP +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ + if (!longrefs) { +put_again: + br_read_lock(vfsmount_lock); + if (likely(atomic_read(&mnt->mnt_longrefs))) { + mnt_dec_count(mnt); + br_read_unlock(vfsmount_lock); + return; + } + br_read_unlock(vfsmount_lock); + } else { + BUG_ON(!atomic_read(&mnt->mnt_longrefs)); + if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) + return; + } + br_write_lock(vfsmount_lock); - if (!atomic_dec_and_test(&mnt->mnt_count)) { + if (!longrefs) + mnt_dec_count(mnt); + else + atomic_dec(&mnt->mnt_longrefs); + if (mnt_get_count(mnt)) { br_write_unlock(vfsmount_lock); return; } - if (likely(!mnt->mnt_pinned)) { + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - __mntput(mnt); + acct_auto_close_mnt(mnt); + goto put_again; + } + br_write_unlock(vfsmount_lock); + mntfree(mnt); +} +#else +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ +put_again: + mnt_dec_count(mnt); + if (likely(mnt_get_count(mnt))) return; + br_write_lock(vfsmount_lock); + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto put_again; } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + mntfree(mnt); +} +#endif + +static void mntput_no_expire(struct vfsmount *mnt) +{ + __mntput(mnt, 0); +} + +void mntput(struct vfsmount *mnt) +{ + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 0); + } +} +EXPORT_SYMBOL(mntput); + +struct vfsmount *mntget(struct vfsmount *mnt) +{ + if (mnt) + mnt_inc_count(mnt); + return mnt; +} +EXPORT_SYMBOL(mntget); + +void mntput_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 1); + } +#else + mntput(mnt); +#endif } -EXPORT_SYMBOL(mntput_no_expire); +EXPORT_SYMBOL(mntput_long); + +struct vfsmount *mntget_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) + atomic_inc(&mnt->mnt_longrefs); + return mnt; +#else + return mntget(mnt); +#endif +} +EXPORT_SYMBOL(mntget_long); void mnt_pin(struct vfsmount *mnt) { @@ -701,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt) mnt->mnt_pinned++; br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { - atomic_inc(&mnt->mnt_count); + mnt_inc_count(mnt); mnt->mnt_pinned--; } br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_unpin); static inline void mangle(struct seq_file *m, const char *s) @@ -1008,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - br_read_lock(vfsmount_lock); + /* write lock needed for mnt_get_count */ + br_write_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { - actual_refs += atomic_read(&p->mnt_count); + actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1040,10 +1186,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_read_lock(vfsmount_lock); + br_write_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1070,7 +1216,7 @@ void release_mounts(struct list_head *head) dput(dentry); mntput(m); } - mntput(mnt); + mntput_long(mnt); } } @@ -1125,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags) flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; - if (atomic_read(&mnt->mnt_count) != 2) + /* + * probably don't strictly need the lock here if we examined + * all race cases, but it's a slowpath. + */ + br_write_lock(vfsmount_lock); + if (mnt_get_count(mnt) != 2) { + br_write_lock(vfsmount_lock); return -EBUSY; + } + br_write_unlock(vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1815,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, unlock: up_write(&namespace_sem); - mntput(newmnt); + mntput_long(newmnt); return err; } @@ -2148,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, if (fs) { if (p == fs->root.mnt) { rootmnt = p; - fs->root.mnt = mntget(q); + fs->root.mnt = mntget_long(q); } if (p == fs->pwd.mnt) { pwdmnt = p; - fs->pwd.mnt = mntget(q); + fs->pwd.mnt = mntget_long(q); } } p = next_mnt(p, mnt_ns->root); @@ -2161,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, up_write(&namespace_sem); if (rootmnt) - mntput(rootmnt); + mntput_long(rootmnt); if (pwdmnt) - mntput(pwdmnt); + mntput_long(pwdmnt); return new_ns; } @@ -2350,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); + error = 0; path_put(&root_parent); path_put(&parent_path); @@ -2376,6 +2531,7 @@ static void __init init_mount_tree(void) mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); + ns = create_mnt_ns(mnt); if (IS_ERR(ns)) panic("Can't allocate initial namespace"); diff --git a/fs/pipe.c b/fs/pipe.c index cfe3a7f2ee21..68f1f8e4e23b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void) static void __exit exit_pipe_fs(void) { unregister_filesystem(&pipe_fs_type); - mntput(pipe_mnt); + mntput_long(pipe_mnt); } fs_initcall(init_pipe_fs); diff --git a/fs/pnode.c b/fs/pnode.c index 8066b8dd748f..d42514e32380 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -288,7 +288,7 @@ out: */ static inline int do_refcount_check(struct vfsmount *mnt, int count) { - int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; + int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; return (mycount > count); } @@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. * - * vfsmount lock must be held for read or write + * vfsmount lock must be held for write */ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) { diff --git a/fs/super.c b/fs/super.c index 968ba013011a..823e061faa87 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1140,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return mnt; err: - mntput(mnt); + mntput_long(mnt); return ERR_PTR(err); } diff --git a/include/linux/mount.h b/include/linux/mount.h index 5e7a59408dd4..1869ea24a739 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -13,6 +13,7 @@ #include #include #include +#include #include struct super_block; @@ -46,12 +47,24 @@ struct mnt_namespace; #define MNT_INTERNAL 0x4000 +struct mnt_pcp { + int mnt_count; + int mnt_writers; +}; + struct vfsmount { struct list_head mnt_hash; struct vfsmount *mnt_parent; /* fs we are mounted on */ struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ +#ifdef CONFIG_SMP + struct mnt_pcp __percpu *mnt_pcp; + atomic_t mnt_longrefs; +#else + int mnt_count; + int mnt_writers; +#endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ int mnt_flags; @@ -70,57 +83,25 @@ struct vfsmount { struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ - /* - * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount - * to let these frequently modified fields in a separate cache line - * (so that reads of mnt_flags wont ping-pong on SMP machines) - */ - atomic_t mnt_count; int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; -#ifdef CONFIG_SMP - int __percpu *mnt_writers; -#else - int mnt_writers; -#endif }; -static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - return mnt->mnt_writers; -#else - return &mnt->mnt_writers; -#endif -} - -static inline struct vfsmount *mntget(struct vfsmount *mnt) -{ - if (mnt) - atomic_inc(&mnt->mnt_count); - return mnt; -} - struct file; /* forward dec */ extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); -extern void mntput_no_expire(struct vfsmount *mnt); +extern void mntput(struct vfsmount *mnt); +extern struct vfsmount *mntget(struct vfsmount *mnt); +extern void mntput_long(struct vfsmount *mnt); +extern struct vfsmount *mntget_long(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); -static inline void mntput(struct vfsmount *mnt) -{ - if (mnt) { - mnt->mnt_expiry_mark = 0; - mntput_no_expire(mnt); - } -} - extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); diff --git a/include/linux/path.h b/include/linux/path.h index edc98dec6266..a581e8c06533 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -10,7 +10,9 @@ struct path { }; extern void path_get(struct path *); +extern void path_get_long(struct path *); extern void path_put(struct path *); +extern void path_put_long(struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { diff --git a/net/socket.c b/net/socket.c index 0ee74c325320..815bba3d2fe0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2390,6 +2390,8 @@ EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { + int err; + /* * Initialize sock SLAB cache. */ @@ -2406,8 +2408,15 @@ static int __init sock_init(void) */ init_inodecache(); - register_filesystem(&sock_fs_type); + + err = register_filesystem(&sock_fs_type); + if (err) + goto out_fs; sock_mnt = kern_mount(&sock_fs_type); + if (IS_ERR(sock_mnt)) { + err = PTR_ERR(sock_mnt); + goto out_mount; + } /* The real protocol initialization is performed in later initcalls. */ @@ -2420,7 +2429,13 @@ static int __init sock_init(void) skb_timestamping_init(); #endif - return 0; +out: + return err; + +out_mount: + unregister_filesystem(&sock_fs_type); +out_fs: + goto out; } core_initcall(sock_init); /* early initcall */ -- cgit v1.2.3