From fe15ce446beb3a33583af81ffe6c9d01a75314ed Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:23 +1100 Subject: fs: change d_delete semantics Change d_delete from a dentry deletion notification to a dentry caching advise, more like ->drop_inode. Require it to be constant and idempotent, and not take d_lock. This is how all existing filesystems use the callback anyway. This makes fine grained dentry locking of dput and dentry lru scanning much simpler. Signed-off-by: Nick Piggin --- net/sunrpc/rpc_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 10a17a37ec4..a0dc1a86fce 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -430,7 +430,7 @@ void rpc_put_mount(void) } EXPORT_SYMBOL_GPL(rpc_put_mount); -static int rpc_delete_dentry(struct dentry *dentry) +static int rpc_delete_dentry(const struct dentry *dentry) { return 1; } -- cgit v1.2.3 From fa0d7e3de6d6fc5004ad9dea0dd6b286af8f03e9 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:49 +1100 Subject: fs: icache RCU free inodes RCU free the struct inode. This will allow: - Subsequent store-free path walking patch. The inode must be consulted for permissions when walking, so an RCU inode reference is a must. - sb_inode_list_lock to be moved inside i_lock because sb list walkers who want to take i_lock no longer need to take sb_inode_list_lock to walk the list in the first place. This will simplify and optimize locking. - Could remove some nested trylock loops in dcache code - Could potentially simplify things a bit in VM land. Do not need to take the page lock to follow page->mapping. The downsides of this is the performance cost of using RCU. In a simple creat/unlink microbenchmark, performance drops by about 10% due to inability to reuse cache-hot slab objects. As iterations increase and RCU freeing starts kicking over, this increases to about 20%. In cases where inode lifetimes are longer (ie. many inodes may be allocated during the average life span of a single inode), a lot of this cache reuse is not applicable, so the regression caused by this patch is smaller. The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU, however this adds some complexity to list walking and store-free path walking, so I prefer to implement this at a later date, if it is shown to be a win in real situations. I haven't found a regression in any non-micro benchmark so I doubt it will be a problem. Signed-off-by: Nick Piggin --- Documentation/filesystems/porting | 14 ++++++++++++++ arch/powerpc/platforms/cell/spufs/inode.c | 10 ++++++++-- drivers/staging/pohmelfs/inode.c | 9 ++++++++- drivers/staging/smbfs/inode.c | 9 ++++++++- fs/9p/vfs_inode.c | 9 ++++++++- fs/adfs/super.c | 9 ++++++++- fs/affs/super.c | 9 ++++++++- fs/afs/super.c | 10 +++++++++- fs/befs/linuxvfs.c | 10 ++++++++-- fs/bfs/inode.c | 9 ++++++++- fs/block_dev.c | 9 ++++++++- fs/btrfs/inode.c | 9 ++++++++- fs/ceph/inode.c | 11 ++++++++++- fs/cifs/cifsfs.c | 9 ++++++++- fs/coda/inode.c | 9 ++++++++- fs/ecryptfs/super.c | 12 +++++++++++- fs/efs/super.c | 9 ++++++++- fs/exofs/super.c | 9 ++++++++- fs/ext2/super.c | 9 ++++++++- fs/ext3/super.c | 9 ++++++++- fs/ext4/super.c | 9 ++++++++- fs/fat/inode.c | 9 ++++++++- fs/freevxfs/vxfs_inode.c | 9 ++++++++- fs/fuse/inode.c | 9 ++++++++- fs/gfs2/super.c | 9 ++++++++- fs/hfs/super.c | 9 ++++++++- fs/hfsplus/super.c | 10 +++++++++- fs/hostfs/hostfs_kern.c | 9 ++++++++- fs/hpfs/super.c | 9 ++++++++- fs/hppfs/hppfs.c | 9 ++++++++- fs/hugetlbfs/inode.c | 9 ++++++++- fs/inode.c | 10 +++++++++- fs/isofs/inode.c | 9 ++++++++- fs/jffs2/super.c | 9 ++++++++- fs/jfs/super.c | 10 +++++++++- fs/logfs/inode.c | 9 ++++++++- fs/minix/inode.c | 9 ++++++++- fs/ncpfs/inode.c | 9 ++++++++- fs/nfs/inode.c | 9 ++++++++- fs/nilfs2/super.c | 10 +++++++++- fs/ntfs/inode.c | 9 ++++++++- fs/ocfs2/dlmfs/dlmfs.c | 9 ++++++++- fs/ocfs2/super.c | 9 ++++++++- fs/openpromfs/inode.c | 9 ++++++++- fs/proc/inode.c | 9 ++++++++- fs/qnx4/inode.c | 9 ++++++++- fs/reiserfs/super.c | 9 ++++++++- fs/romfs/super.c | 9 ++++++++- fs/squashfs/super.c | 9 ++++++++- fs/sysv/inode.c | 9 ++++++++- fs/ubifs/super.c | 10 +++++++++- fs/udf/super.c | 9 ++++++++- fs/ufs/super.c | 9 ++++++++- fs/xfs/xfs_iget.c | 13 ++++++++++++- include/linux/fs.h | 5 ++++- include/linux/net.h | 1 - ipc/mqueue.c | 9 ++++++++- mm/shmem.c | 9 ++++++++- net/socket.c | 16 ++++++++-------- net/sunrpc/rpc_pipe.c | 10 +++++++++- 60 files changed, 490 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 1eb76959d09..ccf0ce7866b 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -346,3 +346,17 @@ look at examples of other filesystems) for guidance. for details of what locks to replace dcache_lock with in order to protect particular things. Most of the time, a filesystem only needs ->d_lock, which protects *all* the dcache state of a given dentry. + +-- +[mandatory] + + Filesystems must RCU-free their inodes, if they can have been accessed +via rcu-walk path walk (basically, if the file can have had a path name in the +vfs namespace). + + i_dentry and i_rcu share storage in a union, and the vfs expects +i_dentry to be reinitialized before it is freed, so an: + + INIT_LIST_HEAD(&inode->i_dentry); + +must be done in the RCU callback. diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 03185de7cd4..856e9c39806 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -71,12 +71,18 @@ spufs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void -spufs_destroy_inode(struct inode *inode) +static void spufs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(spufs_inode_cache, SPUFS_I(inode)); } +static void spufs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, spufs_i_callback); +} + static void spufs_init_once(void *p) { diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index 61685ccceda..cc8d2840f9b 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -826,6 +826,13 @@ const struct address_space_operations pohmelfs_aops = { .set_page_dirty = __set_page_dirty_nobuffers, }; +static void pohmelfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(pohmelfs_inode_cache, POHMELFS_I(inode)); +} + /* * ->detroy_inode() callback. Deletes inode from the caches * and frees private data. @@ -842,8 +849,8 @@ static void pohmelfs_destroy_inode(struct inode *inode) dprintk("%s: pi: %p, inode: %p, ino: %llu.\n", __func__, pi, &pi->vfs_inode, pi->ino); - kmem_cache_free(pohmelfs_inode_cache, pi); atomic_long_dec(&psb->total_inodes); + call_rcu(&inode->i_rcu, pohmelfs_i_callback); } /* diff --git a/drivers/staging/smbfs/inode.c b/drivers/staging/smbfs/inode.c index 540a984bb51..244319dc970 100644 --- a/drivers/staging/smbfs/inode.c +++ b/drivers/staging/smbfs/inode.c @@ -62,11 +62,18 @@ static struct inode *smb_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void smb_destroy_inode(struct inode *inode) +static void smb_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(smb_inode_cachep, SMB_I(inode)); } +static void smb_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, smb_i_callback); +} + static void init_once(void *foo) { struct smb_inode_info *ei = (struct smb_inode_info *) foo; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 1073bca8488..f6f9081e6d2 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -237,10 +237,17 @@ struct inode *v9fs_alloc_inode(struct super_block *sb) * */ -void v9fs_destroy_inode(struct inode *inode) +static void v9fs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); } + +void v9fs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, v9fs_i_callback); +} #endif /** diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 959dbff2d42..47dffc513a2 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void adfs_destroy_inode(struct inode *inode) +static void adfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); } +static void adfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, adfs_i_callback); +} + static void init_once(void *foo) { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; diff --git a/fs/affs/super.c b/fs/affs/super.c index 0cf7f4384cb..4c18fcfb0a1 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb) return &i->vfs_inode; } -static void affs_destroy_inode(struct inode *inode) +static void affs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); } +static void affs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, affs_i_callback); +} + static void init_once(void *foo) { struct affs_inode_info *ei = (struct affs_inode_info *) foo; diff --git a/fs/afs/super.c b/fs/afs/super.c index 27201cffece..f901a9d7c11 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb) return &vnode->vfs_inode; } +static void afs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct afs_vnode *vnode = AFS_FS_I(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(afs_inode_cachep, vnode); +} + /* * destroy an AFS inode struct */ @@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode) ASSERTCMP(vnode->server, ==, NULL); - kmem_cache_free(afs_inode_cachep, vnode); + call_rcu(&inode->i_rcu, afs_i_callback); atomic_dec(&afs_count_active_inodes); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index aa4e7c7ae3c..de93581b79a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb) return &bi->vfs_inode; } -static void -befs_destroy_inode(struct inode *inode) +static void befs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } +static void befs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, befs_i_callback); +} + static void init_once(void *foo) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 76db6d7d49b..a8e37f81d09 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb) return &bi->vfs_inode; } -static void bfs_destroy_inode(struct inode *inode) +static void bfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); } +static void bfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bfs_i_callback); +} + static void init_once(void *foo) { struct bfs_inode_info *bi = foo; diff --git a/fs/block_dev.c b/fs/block_dev.c index 4230252fd68..771f2352701 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -409,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void bdev_destroy_inode(struct inode *inode) +static void bdev_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); struct bdev_inode *bdi = BDEV_I(inode); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(bdev_cachep, bdi); } +static void bdev_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bdev_i_callback); +} + static void init_once(void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7ce9f893278..f9d2994a42a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6495,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return inode; } +static void btrfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} + void btrfs_destroy_inode(struct inode *inode) { struct btrfs_ordered_extent *ordered; @@ -6564,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode) inode_tree_del(inode); btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); free: - kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); + call_rcu(&inode->i_rcu, btrfs_i_callback); } int btrfs_drop_inode(struct inode *inode) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2a48cafc174..47f8c8baf3b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -368,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb) return &ci->vfs_inode; } +static void ceph_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ceph_inode_info *ci = ceph_inode(inode); + + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ceph_inode_cachep, ci); +} + void ceph_destroy_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); @@ -407,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode) if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); - kmem_cache_free(ceph_inode_cachep, ci); + call_rcu(&inode->i_rcu, ceph_i_callback); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3936aa7f2c2..223717dcc40 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -334,10 +334,17 @@ cifs_alloc_inode(struct super_block *sb) return &cifs_inode->vfs_inode; } +static void cifs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); +} + static void cifs_destroy_inode(struct inode *inode) { - kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); + call_rcu(&inode->i_rcu, cifs_i_callback); } static void diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 5ea57c8c7f9..50dc7d189f5 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void coda_destroy_inode(struct inode *inode) +static void coda_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(coda_inode_cachep, ITOC(inode)); } +static void coda_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, coda_i_callback); +} + static void init_once(void *foo) { struct coda_inode_info *ei = (struct coda_inode_info *) foo; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 2720178b771..3042fe123a3 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -62,6 +62,16 @@ out: return inode; } +static void ecryptfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ecryptfs_inode_info *inode_info; + inode_info = ecryptfs_inode_to_private(inode); + + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ecryptfs_inode_info_cache, inode_info); +} + /** * ecryptfs_destroy_inode * @inode: The ecryptfs inode @@ -88,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode) } } ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); - kmem_cache_free(ecryptfs_inode_info_cache, inode_info); + call_rcu(&inode->i_rcu, ecryptfs_i_callback); } /** diff --git a/fs/efs/super.c b/fs/efs/super.c index 5073a07652c..0f31acb0131 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void efs_destroy_inode(struct inode *inode) +static void efs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); } +static void efs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, efs_i_callback); +} + static void init_once(void *foo) { struct efs_inode_info *ei = (struct efs_inode_info *) foo; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 79c3ae6e045..8c6c4669b38 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb) return &oi->vfs_inode; } +static void exofs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); +} + /* * Remove an inode from the cache */ static void exofs_destroy_inode(struct inode *inode) { - kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); + call_rcu(&inode->i_rcu, exofs_i_callback); } /* diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d89e0b6a2d7..e0c6380ff99 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ext2_destroy_inode(struct inode *inode) +static void ext2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } +static void ext2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ext2_i_callback); +} + static void init_once(void *foo) { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index acf8695fa8f..77ce1616f72 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -479,6 +479,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } +static void ext3_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); +} + static void ext3_destroy_inode(struct inode *inode) { if (!list_empty(&(EXT3_I(inode)->i_orphan))) { @@ -489,7 +496,7 @@ static void ext3_destroy_inode(struct inode *inode) false); dump_stack(); } - kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); + call_rcu(&inode->i_rcu, ext3_i_callback); } static void init_once(void *foo) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb15c9c0be7..cd37f9d5e44 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -841,6 +841,13 @@ static int ext4_drop_inode(struct inode *inode) return drop; } +static void ext4_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); +} + static void ext4_destroy_inode(struct inode *inode) { ext4_ioend_wait(inode); @@ -853,7 +860,7 @@ static void ext4_destroy_inode(struct inode *inode) true); dump_stack(); } - kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); + call_rcu(&inode->i_rcu, ext4_i_callback); } static void init_once(void *foo) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ad6998a92c3..8cccfebee18 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void fat_destroy_inode(struct inode *inode) +static void fat_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); } +static void fat_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, fat_i_callback); +} + static void init_once(void *foo) { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 8c04eac5079..2ba6719ac61 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino) return ip; } +static void vxfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(vxfs_inode_cachep, inode->i_private); +} + /** * vxfs_evict_inode - remove inode from main memory * @ip: inode to discard. @@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip) { truncate_inode_pages(&ip->i_data, 0); end_writeback(ip); - kmem_cache_free(vxfs_inode_cachep, ip->i_private); + call_rcu(&ip->i_rcu, vxfs_i_callback); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cfce3ad86a9..44e0a6c57e8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -99,6 +99,13 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) return inode; } +static void fuse_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(fuse_inode_cachep, inode); +} + static void fuse_destroy_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -106,7 +113,7 @@ static void fuse_destroy_inode(struct inode *inode) BUG_ON(!list_empty(&fi->queued_writes)); if (fi->forget_req) fuse_request_free(fi->forget_req); - kmem_cache_free(fuse_inode_cachep, inode); + call_rcu(&inode->i_rcu, fuse_i_callback); } void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2b2c4997430..16c2ecac7eb 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) return &ip->i_inode; } -static void gfs2_destroy_inode(struct inode *inode) +static void gfs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(gfs2_inode_cachep, inode); } +static void gfs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, gfs2_i_callback); +} + const struct super_operations gfs2_super_ops = { .alloc_inode = gfs2_alloc_inode, .destroy_inode = gfs2_destroy_inode, diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4824c27cebb..ef4ee5716ab 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb) return i ? &i->vfs_inode : NULL; } -static void hfs_destroy_inode(struct inode *inode) +static void hfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(hfs_inode_cachep, HFS_I(inode)); } +static void hfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hfs_i_callback); +} + static const struct super_operations hfs_super_operations = { .alloc_inode = hfs_alloc_inode, .destroy_inode = hfs_destroy_inode, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 52cc746d3ba..182e83a9079 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -488,11 +488,19 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb) return i ? &i->vfs_inode : NULL; } -static void hfsplus_destroy_inode(struct inode *inode) +static void hfsplus_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); } +static void hfsplus_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hfsplus_i_callback); +} + #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) static struct dentry *hfsplus_mount(struct file_system_type *fs_type, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 39dc505ed27..861113fcfc8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -247,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode) } } -static void hostfs_destroy_inode(struct inode *inode) +static void hostfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kfree(HOSTFS_I(inode)); } +static void hostfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hostfs_i_callback); +} + static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) { const char *root_path = vfs->mnt_sb->s_fs_info; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 6c5f01597c3..49935ba78db 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void hpfs_destroy_inode(struct inode *inode) +static void hpfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); } +static void hpfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hpfs_i_callback); +} + static void init_once(void *foo) { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index f702b5f713f..87ed48e0343 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino) mntput(ino->i_sb->s_fs_info); } -static void hppfs_destroy_inode(struct inode *inode) +static void hppfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kfree(HPPFS_I(inode)); } +static void hppfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hppfs_i_callback); +} + static const struct super_operations hppfs_sbops = { .alloc_inode = hppfs_alloc_inode, .destroy_inode = hppfs_destroy_inode, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a5fe68189ee..9885082b470 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) return &p->vfs_inode; } +static void hugetlbfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); +} + static void hugetlbfs_destroy_inode(struct inode *inode) { hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); - kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); + call_rcu(&inode->i_rcu, hugetlbfs_i_callback); } static const struct address_space_operations hugetlbfs_aops = { diff --git a/fs/inode.c b/fs/inode.c index 5a0a898f55d..6751dfe8cc0 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -272,6 +272,13 @@ void __destroy_inode(struct inode *inode) } EXPORT_SYMBOL(__destroy_inode); +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(inode_cachep, inode); +} + static void destroy_inode(struct inode *inode) { BUG_ON(!list_empty(&inode->i_lru)); @@ -279,7 +286,7 @@ static void destroy_inode(struct inode *inode) if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else - kmem_cache_free(inode_cachep, (inode)); + call_rcu(&inode->i_rcu, i_callback); } /* @@ -432,6 +439,7 @@ void end_writeback(struct inode *inode) BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); inode_sync_wait(inode); + /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } EXPORT_SYMBOL(end_writeback); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d204ee4235f..d8f3a652243 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -81,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void isofs_destroy_inode(struct inode *inode) +static void isofs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } +static void isofs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, isofs_i_callback); +} + static void init_once(void *foo) { struct iso_inode_info *ei = foo; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index c86041b866a..853b8e30008 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb) return &f->vfs_inode; } -static void jffs2_destroy_inode(struct inode *inode) +static void jffs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); } +static void jffs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, jffs2_i_callback); +} + static void jffs2_i_init_once(void *foo) { struct jffs2_inode_info *f = foo; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 0669fc1cc3b..b715b0f7bdf 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb) return &jfs_inode->vfs_inode; } +static void jfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct jfs_inode_info *ji = JFS_IP(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(jfs_inode_cachep, ji); +} + static void jfs_destroy_inode(struct inode *inode) { struct jfs_inode_info *ji = JFS_IP(inode); @@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode) ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); - kmem_cache_free(jfs_inode_cachep, ji); + call_rcu(&inode->i_rcu, jfs_i_callback); } static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index d8c71ece098..03b8c240aed 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached) return __logfs_iget(sb, ino); } +static void logfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(logfs_inode_cache, logfs_inode(inode)); +} + static void __logfs_destroy_inode(struct inode *inode) { struct logfs_inode *li = logfs_inode(inode); BUG_ON(li->li_block); list_del(&li->li_freeing_list); - kmem_cache_free(logfs_inode_cache, li); + call_rcu(&inode->i_rcu, logfs_i_callback); } static void logfs_destroy_inode(struct inode *inode) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index fb2020858a3..ae0b83f476a 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void minix_destroy_inode(struct inode *inode) +static void minix_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(minix_inode_cachep, minix_i(inode)); } +static void minix_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, minix_i_callback); +} + static void init_once(void *foo) { struct minix_inode_info *ei = (struct minix_inode_info *) foo; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 8fb93b604e7..60047dbeb38 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -58,11 +58,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ncp_destroy_inode(struct inode *inode) +static void ncp_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); } +static void ncp_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ncp_i_callback); +} + static void init_once(void *foo) { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e67e31c7341..017daa3bed3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1438,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return &nfsi->vfs_inode; } -void nfs_destroy_inode(struct inode *inode) +static void nfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } +void nfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, nfs_i_callback); +} + static inline void nfs4_init_once(struct nfs_inode *nfsi) { #ifdef CONFIG_NFS_V4 diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index d36fc7ee615..e2dcc9c733f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -162,10 +162,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return &ii->vfs_inode; } -void nilfs_destroy_inode(struct inode *inode) +static void nilfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + INIT_LIST_HEAD(&inode->i_dentry); + if (mdi) { kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ kfree(mdi); @@ -173,6 +176,11 @@ void nilfs_destroy_inode(struct inode *inode) kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } +void nilfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, nilfs_i_callback); +} + static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 93622b175fc..a627ed82c0a 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb) return NULL; } +static void ntfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); +} + void ntfs_destroy_big_inode(struct inode *inode) { ntfs_inode *ni = NTFS_I(inode); @@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode) BUG_ON(ni->page); if (!atomic_dec_and_test(&ni->count)) BUG(); - kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); + call_rcu(&inode->i_rcu, ntfs_i_callback); } static inline ntfs_inode *ntfs_alloc_extent_inode(void) diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index b2df490a19e..8c5c0eddc36 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb) return &ip->ip_vfs_inode; } -static void dlmfs_destroy_inode(struct inode *inode) +static void dlmfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); } +static void dlmfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, dlmfs_i_callback); +} + static void dlmfs_evict_inode(struct inode *inode) { int status; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index cfeab7ce369..17ff46fa8a1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -569,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) return &oi->vfs_inode; } -static void ocfs2_destroy_inode(struct inode *inode) +static void ocfs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); } +static void ocfs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ocfs2_i_callback); +} + static unsigned long long ocfs2_max_file_offset(unsigned int bbits, unsigned int cbits) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 911e61f348f..a2a5bff774e 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb) return &oi->vfs_inode; } -static void openprom_destroy_inode(struct inode *inode) +static void openprom_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(op_inode_cachep, OP_I(inode)); } +static void openprom_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, openprom_i_callback); +} + static struct inode *openprom_iget(struct super_block *sb, ino_t ino) { struct inode *inode; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 3ddb6068177..6bcb926b101 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -65,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb) return inode; } -static void proc_destroy_inode(struct inode *inode) +static void proc_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } +static void proc_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, proc_i_callback); +} + static void init_once(void *foo) { struct proc_inode *ei = (struct proc_inode *) foo; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index fcada42f1aa..e63b4171d58 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void qnx4_destroy_inode(struct inode *inode) +static void qnx4_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); } +static void qnx4_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, qnx4_i_callback); +} + static void init_once(void *foo) { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b243117b875..2575682a9ea 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -529,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void reiserfs_destroy_inode(struct inode *inode) +static void reiserfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } +static void reiserfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, reiserfs_i_callback); +} + static void init_once(void *foo) { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 6647f90e55c..2305e3121cb 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb) /* * return a spent inode to the slab cache */ -static void romfs_destroy_inode(struct inode *inode) +static void romfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } +static void romfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, romfs_i_callback); +} + /* * get filesystem statistics */ diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 24de30ba34c..20700b9f2b4 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -440,11 +440,18 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb) } -static void squashfs_destroy_inode(struct inode *inode) +static void squashfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); } +static void squashfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, squashfs_i_callback); +} + static struct file_system_type squashfs_fs_type = { .owner = THIS_MODULE, diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index de44d067b9e..0630eb969a2 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb) return &si->vfs_inode; } -static void sysv_destroy_inode(struct inode *inode) +static void sysv_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); } +static void sysv_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, sysv_i_callback); +} + static void init_once(void *p) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 91fac54c70e..6e11c2975dc 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb) return &ui->vfs_inode; }; +static void ubifs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ubifs_inode *ui = ubifs_inode(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ubifs_inode_slab, ui); +} + static void ubifs_destroy_inode(struct inode *inode) { struct ubifs_inode *ui = ubifs_inode(inode); kfree(ui->data); - kmem_cache_free(ubifs_inode_slab, inode); + call_rcu(&inode->i_rcu, ubifs_i_callback); } /* diff --git a/fs/udf/super.c b/fs/udf/super.c index 4a5c7c61836..b539d53320f 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -139,11 +139,18 @@ static struct inode *udf_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void udf_destroy_inode(struct inode *inode) +static void udf_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(udf_inode_cachep, UDF_I(inode)); } +static void udf_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, udf_i_callback); +} + static void init_once(void *foo) { struct udf_inode_info *ei = (struct udf_inode_info *)foo; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 2c47daed56d..2c61ac5d4e4 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ufs_destroy_inode(struct inode *inode) +static void ufs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); } +static void ufs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ufs_i_callback); +} + static void init_once(void *foo) { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 0cdd26932d8..d7de5a3f786 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -91,6 +91,17 @@ xfs_inode_alloc( return ip; } +STATIC void +xfs_inode_free_callback( + struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct xfs_inode *ip = XFS_I(inode); + + INIT_LIST_HEAD(&inode->i_dentry); + kmem_zone_free(xfs_inode_zone, ip); +} + void xfs_inode_free( struct xfs_inode *ip) @@ -134,7 +145,7 @@ xfs_inode_free( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); - kmem_zone_free(xfs_inode_zone, ip); + call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 296cf2fde94..1ff4d0a33b2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -737,7 +737,10 @@ struct inode { struct list_head i_wb_list; /* backing dev IO list */ struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; - struct list_head i_dentry; + union { + struct list_head i_dentry; + struct rcu_head i_rcu; + }; unsigned long i_ino; atomic_t i_count; unsigned int i_nlink; diff --git a/include/linux/net.h b/include/linux/net.h index 16faa130088..06bde490847 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -120,7 +120,6 @@ enum sock_shutdown_cmd { struct socket_wq { wait_queue_head_t wait; struct fasync_struct *fasync_list; - struct rcu_head rcu; } ____cacheline_aligned_in_smp; /** diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 035f4399edb..14fb6d67e6a 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -237,11 +237,18 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void mqueue_destroy_inode(struct inode *inode) +static void mqueue_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); } +static void mqueue_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, mqueue_i_callback); +} + static void mqueue_evict_inode(struct inode *inode) { struct mqueue_inode_info *info; diff --git a/mm/shmem.c b/mm/shmem.c index 47fdeeb9d63..5ee67c99060 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2415,13 +2415,20 @@ static struct inode *shmem_alloc_inode(struct super_block *sb) return &p->vfs_inode; } +static void shmem_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); +} + static void shmem_destroy_inode(struct inode *inode) { if ((inode->i_mode & S_IFMT) == S_IFREG) { /* only struct inode is valid if it's an inline symlink */ mpol_free_shared_policy(&SHMEM_I(inode)->policy); } - kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); + call_rcu(&inode->i_rcu, shmem_i_callback); } static void init_once(void *foo) diff --git a/net/socket.c b/net/socket.c index 088fb3fd45e..97fff3a4e72 100644 --- a/net/socket.c +++ b/net/socket.c @@ -262,20 +262,20 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } -static void wq_free_rcu(struct rcu_head *head) +static void sock_free_rcu(struct rcu_head *head) { - struct socket_wq *wq = container_of(head, struct socket_wq, rcu); + struct inode *inode = container_of(head, struct inode, i_rcu); + struct socket_alloc *ei = container_of(inode, struct socket_alloc, + vfs_inode); - kfree(wq); + kfree(ei->socket.wq); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(sock_inode_cachep, ei); } static void sock_destroy_inode(struct inode *inode) { - struct socket_alloc *ei; - - ei = container_of(inode, struct socket_alloc, vfs_inode); - call_rcu(&ei->socket.wq->rcu, wq_free_rcu); - kmem_cache_free(sock_inode_cachep, ei); + call_rcu(&inode->i_rcu, sock_free_rcu); } static void init_once(void *foo) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a0dc1a86fce..2899fe27f88 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -162,11 +162,19 @@ rpc_alloc_inode(struct super_block *sb) } static void -rpc_destroy_inode(struct inode *inode) +rpc_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); } +static void +rpc_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, rpc_i_callback); +} + static int rpc_pipe_open(struct inode *inode, struct file *filp) { -- cgit v1.2.3 From ff0c7d15f9787b7e8c601533c015295cc68329f8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:50 +1100 Subject: fs: avoid inode RCU freeing for pseudo fs Pseudo filesystems that don't put inode on RCU list or reachable by rcu-walk dentries do not need to RCU free their inodes. Signed-off-by: Nick Piggin --- fs/inode.c | 6 ++++++ fs/pipe.c | 6 +++++- include/linux/fs.h | 1 + include/linux/net.h | 1 + net/socket.c | 17 +++++++++-------- 5 files changed, 22 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/fs/inode.c b/fs/inode.c index 6751dfe8cc0..da85e56378f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -257,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb) return inode; } +void free_inode_nonrcu(struct inode *inode) +{ + kmem_cache_free(inode_cachep, inode); +} +EXPORT_SYMBOL(free_inode_nonrcu); + void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); diff --git a/fs/pipe.c b/fs/pipe.c index 04629f36e39..ae3592dcc88 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1253,6 +1253,10 @@ out: return ret; } +static const struct super_operations pipefs_ops = { + .destroy_inode = free_inode_nonrcu, +}; + /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need @@ -1262,7 +1266,7 @@ out: static struct dentry *pipefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); + return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC); } static struct file_system_type pipe_fs_type = { diff --git a/include/linux/fs.h b/include/linux/fs.h index 1ff4d0a33b2..ea202fff44f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2233,6 +2233,7 @@ extern void iget_failed(struct inode *); extern void end_writeback(struct inode *); extern void __destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); +extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); diff --git a/include/linux/net.h b/include/linux/net.h index 06bde490847..16faa130088 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -120,6 +120,7 @@ enum sock_shutdown_cmd { struct socket_wq { wait_queue_head_t wait; struct fasync_struct *fasync_list; + struct rcu_head rcu; } ____cacheline_aligned_in_smp; /** diff --git a/net/socket.c b/net/socket.c index 97fff3a4e72..817dc92e9ef 100644 --- a/net/socket.c +++ b/net/socket.c @@ -262,20 +262,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } -static void sock_free_rcu(struct rcu_head *head) + +static void wq_free_rcu(struct rcu_head *head) { - struct inode *inode = container_of(head, struct inode, i_rcu); - struct socket_alloc *ei = container_of(inode, struct socket_alloc, - vfs_inode); + struct socket_wq *wq = container_of(head, struct socket_wq, rcu); - kfree(ei->socket.wq); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(sock_inode_cachep, ei); + kfree(wq); } static void sock_destroy_inode(struct inode *inode) { - call_rcu(&inode->i_rcu, sock_free_rcu); + struct socket_alloc *ei; + + ei = container_of(inode, struct socket_alloc, vfs_inode); + call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + kmem_cache_free(sock_inode_cachep, ei); } static void init_once(void *foo) -- cgit v1.2.3 From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:55 +1100 Subject: fs: dcache reduce branches in lookup path Reduce some branches and memory accesses in dcache lookup by adding dentry flags to indicate common d_ops are set, rather than having to check them. This saves a pointer memory access (dentry->d_op) in common path lookup situations, and saves another pointer load and branch in cases where we have d_op but not the particular operation. Patched with: git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i Signed-off-by: Nick Piggin --- arch/ia64/kernel/perfmon.c | 2 +- drivers/staging/autofs/root.c | 2 +- drivers/staging/smbfs/dir.c | 8 ++++---- fs/9p/vfs_inode.c | 26 +++++++++++++------------- fs/adfs/dir.c | 2 +- fs/adfs/super.c | 2 +- fs/affs/namei.c | 2 +- fs/affs/super.c | 2 +- fs/afs/dir.c | 2 +- fs/anon_inodes.c | 2 +- fs/autofs4/inode.c | 2 +- fs/autofs4/root.c | 10 +++++----- fs/btrfs/export.c | 4 ++-- fs/btrfs/inode.c | 2 +- fs/ceph/dir.c | 6 +++--- fs/cifs/dir.c | 16 ++++++++-------- fs/cifs/inode.c | 8 ++++---- fs/cifs/link.c | 4 ++-- fs/cifs/readdir.c | 4 ++-- fs/coda/dir.c | 2 +- fs/configfs/dir.c | 8 ++++---- fs/dcache.c | 30 ++++++++++++++++++++++++++---- fs/ecryptfs/inode.c | 2 +- fs/ecryptfs/main.c | 4 ++-- fs/fat/inode.c | 4 ++-- fs/fat/namei_msdos.c | 6 +++--- fs/fat/namei_vfat.c | 8 ++++---- fs/fuse/dir.c | 2 +- fs/fuse/inode.c | 4 ++-- fs/gfs2/export.c | 4 ++-- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/ops_inode.c | 2 +- fs/hfs/dir.c | 2 +- fs/hfs/super.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hfsplus/super.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/dentry.c | 2 +- fs/isofs/inode.c | 2 +- fs/isofs/namei.c | 2 +- fs/jfs/namei.c | 4 ++-- fs/jfs/super.c | 2 +- fs/libfs.c | 2 +- fs/minix/namei.c | 2 +- fs/namei.c | 31 ++++++++++++++++++++----------- fs/ncpfs/dir.c | 4 ++-- fs/ncpfs/inode.c | 2 +- fs/nfs/dir.c | 6 +++--- fs/nfs/getroot.c | 4 ++-- fs/ocfs2/export.c | 4 ++-- fs/ocfs2/namei.c | 10 +++++----- fs/pipe.c | 2 +- fs/proc/base.c | 12 ++++++------ fs/proc/generic.c | 2 +- fs/proc/proc_sysctl.c | 4 ++-- fs/reiserfs/xattr.c | 2 +- fs/sysfs/dir.c | 2 +- fs/sysv/namei.c | 2 +- fs/sysv/super.c | 2 +- include/linux/dcache.h | 6 ++++++ kernel/cgroup.c | 2 +- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 63 files changed, 174 insertions(+), 137 deletions(-) (limited to 'net') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index d39d8a53b57..5a24f40bb48 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2233,7 +2233,7 @@ pfm_alloc_file(pfm_context_t *ctx) } path.mnt = mntget(pfmfs_mnt); - path.dentry->d_op = &pfmfs_dentry_operations; + d_set_d_op(path.dentry, &pfmfs_dentry_operations); d_add(path.dentry, inode); file = alloc_file(&path, FMODE_READ, &pfm_file_ops); diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c index 0fdec4befd8..b09adb57971 100644 --- a/drivers/staging/autofs/root.c +++ b/drivers/staging/autofs/root.c @@ -237,7 +237,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr * * We need to do this before we release the directory semaphore. */ - dentry->d_op = &autofs_dentry_operations; + d_set_d_op(dentry, &autofs_dentry_operations); dentry->d_flags |= DCACHE_AUTOFS_PENDING; d_add(dentry, NULL); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 5f79799d5d4..78f09412740 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -398,9 +398,9 @@ smb_new_dentry(struct dentry *dentry) struct smb_sb_info *server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); dentry->d_time = jiffies; } @@ -462,9 +462,9 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) add_entry: server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); d_add(dentry, inode); smb_renew_times(dentry); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f6f9081e6d2..df8bbb358d5 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -635,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, } if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); @@ -749,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -767,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -956,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -973,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -1041,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, inst_out: if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_add(dentry, inode); return NULL; @@ -1709,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1722,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } @@ -1856,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, ihold(old_dentry->d_inode); } - dentry->d_op = old_dentry->d_op; + d_set_d_op(dentry, old_dentry->d_op); d_instantiate(dentry, old_dentry->d_inode); return err; @@ -1980,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1996,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index a11e5e10271..bf7693c384f 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -276,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct object_info obj; int error; - dentry->d_op = &adfs_dentry_operations; + d_set_d_op(dentry, &adfs_dentry_operations); lock_kernel(); error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); if (error == 0) { diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 47dffc513a2..a4041b52fbc 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -484,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) adfs_error(sb, "get root inode failed\n"); goto error; } else - sb->s_root->d_op = &adfs_dentry_operations; + d_set_d_op(sb->s_root, &adfs_dentry_operations); unlock_kernel(); return 0; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 5aca08c2110..944a4042fb6 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -240,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (IS_ERR(inode)) return ERR_CAST(inode); } - dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; + d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/affs/super.c b/fs/affs/super.c index 4c18fcfb0a1..d39081bbe7c 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -482,7 +482,7 @@ got_root: printk(KERN_ERR "AFFS: Get root inode failed\n"); goto out_error; } - sb->s_root->d_op = &affs_dentry_operations; + d_set_d_op(sb->s_root, &affs_dentry_operations); pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); return 0; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2c18cde2700..b8bb7e7148d 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -581,7 +581,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, } success: - dentry->d_op = &afs_fs_dentry_operations; + d_set_d_op(dentry, &afs_fs_dentry_operations); d_add(dentry, inode); _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 57ce55b2564..aca8806fa20 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name, */ ihold(anon_inode_inode); - path.dentry->d_op = &anon_inodefs_dentry_operations; + d_set_d_op(path.dentry, &anon_inodefs_dentry_operations); d_instantiate(path.dentry, anon_inode_inode); error = -ENFILE; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index ac87e49fa70..a7bdb9dcac8 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_iput; pipe = NULL; - root->d_op = &autofs4_sb_dentry_operations; + d_set_d_op(root, &autofs4_sb_dentry_operations); root->d_fsdata = ino; /* Can this call block? */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 10ca68a96dc..bfe3f2eb684 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -571,7 +571,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * we check for the hashed dentry and return the newly * hashed dentry. */ - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); /* * And we need to ensure that the same dentry is used for @@ -710,9 +710,9 @@ static int autofs4_dir_symlink(struct inode *dir, d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); @@ -845,9 +845,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 659f532d26a..0ccf9a8afcd 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.offset = 0; dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9d2994a42a..63e4546b478 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) int index; int ret; - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 58abc3da611..cc01cf82676 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) - dentry->d_op = &ceph_dentry_ops; + d_set_d_op(dentry, &ceph_dentry_ops); else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) - dentry->d_op = &ceph_snapdir_dentry_ops; + d_set_d_op(dentry, &ceph_snapdir_dentry_ops); else - dentry->d_op = &ceph_snap_dentry_ops; + d_set_d_op(dentry, &ceph_snap_dentry_ops); di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); if (!di) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 88bfe686ac0..e3b10ca6d45 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon, struct inode *newinode) { if (tcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } @@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); if (rc == 0) d_instantiate(direntry, newinode); @@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if ((rc == 0) && (newInode != NULL)) { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, newInode); if (posix_open) { filp = lookup_instantiate_filp(nd, direntry, @@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, rc = 0; direntry->d_time = jiffies; if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, NULL); /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 99b9a2cc14b..2a239d878e8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) to set uid/gid */ inc_nlink(inode); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); cifs_fill_uniqueid(inode->i_sb, &fattr); @@ -1363,9 +1363,9 @@ mkdir_get_info: inode->i_sb, xid, NULL); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); /* setting nlink not necessary except in cases where we * failed to get it from the server or was set bogus */ diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 85cdbf831e7..fe2f6a93c49 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc); } else { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ee463aeca0b..ec5b68e3b92 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, } if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) - dentry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(dentry, &cifs_ci_dentry_ops); else - dentry->d_op = &cifs_dentry_ops; + d_set_d_op(dentry, &cifs_dentry_ops); alias = d_materialise_unique(dentry, inode); if (alias != NULL) { diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 9e37e8bc9b8..aa40c811f8d 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc return ERR_PTR(error); exit: - entry->d_op = &coda_dentry_operations; + d_set_d_op(entry, &coda_dentry_operations); if (inode && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index e9acea440ff..36637a8c1ed 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den return error; } - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_rehash(dentry); return 0; @@ -489,7 +489,7 @@ static struct dentry * configfs_lookup(struct inode *dir, */ if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); return NULL; } @@ -683,7 +683,7 @@ static int create_default_group(struct config_group *parent_group, ret = -ENOMEM; child = d_alloc(parent, &name); if (child) { - child->d_op = &configfs_dentry_ops; + d_set_d_op(child, &configfs_dentry_ops); d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, @@ -1681,7 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = -ENOMEM; dentry = d_alloc(configfs_sb->s_root, &name); if (dentry) { - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); err = configfs_attach_group(sd->s_element, &group->cg_item, diff --git a/fs/dcache.c b/fs/dcache.c index 1d5cf511e1c..f9693da3efb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -398,7 +398,7 @@ repeat: return; } - if (dentry->d_op && dentry->d_op->d_delete) { + if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) goto kill_it; } @@ -1301,6 +1301,28 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); +void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +{ + BUG_ON(dentry->d_op); + BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | + DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | + DCACHE_OP_DELETE )); + dentry->d_op = op; + if (!op) + return; + if (op->d_hash) + dentry->d_flags |= DCACHE_OP_HASH; + if (op->d_compare) + dentry->d_flags |= DCACHE_OP_COMPARE; + if (op->d_revalidate) + dentry->d_flags |= DCACHE_OP_REVALIDATE; + if (op->d_delete) + dentry->d_flags |= DCACHE_OP_DELETE; + +} +EXPORT_SYMBOL(d_set_d_op); + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); @@ -1731,7 +1753,7 @@ seqretry: */ if (read_seqcount_retry(&dentry->d_seq, *seq)) goto seqretry; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, *inode, dentry, i, tlen, tname, name)) @@ -1846,7 +1868,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) */ tlen = dentry->d_name.len; tname = dentry->d_name.name; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, tlen, tname, name)) @@ -1887,7 +1909,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(name->name, name->len); - if (dir->d_op && dir->d_op->d_hash) { + if (dir->d_flags & DCACHE_OP_HASH) { if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5e5c7ec1fc9..f91b35db4c6 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct qstr lower_name; int rc = 0; - ecryptfs_dentry->d_op = &ecryptfs_dops; + d_set_d_op(ecryptfs_dentry, &ecryptfs_dops); if ((ecryptfs_dentry->d_name.len == 1 && !strcmp(ecryptfs_dentry->d_name.name, ".")) || (ecryptfs_dentry->d_name.len == 2 diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a9dbd62518e..35103867537 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, if (special_file(lower_inode->i_mode)) init_special_inode(inode, lower_inode->i_mode, lower_inode->i_rdev); - dentry->d_op = &ecryptfs_dops; + d_set_d_op(dentry, &ecryptfs_dops); fsstack_copy_attr_all(inode, lower_inode); /* This size will be overwritten for real files w/ headers and * other metadata */ @@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags deactivate_locked_super(s); goto out; } - s->s_root->d_op = &ecryptfs_dops; + d_set_d_op(s->s_root, &ecryptfs_dops); s->s_root->d_sb = s; s->s_root->d_parent = s->s_root; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8cccfebee18..206351af7c5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -750,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, */ result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = sb->s_root->d_op; + d_set_d_op(result, sb->s_root->d_op); return result; } @@ -800,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent)) - parent->d_op = sb->s_root->d_op; + d_set_d_op(parent, sb->s_root->d_op); out: unlock_super(sb); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3b3e072d898..35ffe43afa4 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -227,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, } out: unlock_super(sb); - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); dentry = d_splice_alias(inode, dentry); if (dentry) - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); return dentry; error: @@ -673,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) } sb->s_flags |= MS_NOATIME; - sb->s_root->d_op = &msdos_dentry_operations; + d_set_d_op(sb->s_root, &msdos_dentry_operations); unlock_super(sb); return 0; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 4fc06278db4..3be5ed7d859 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -766,11 +766,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: unlock_super(sb); - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; @@ -1072,9 +1072,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) } if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_ci_dentry_ops; + d_set_d_op(sb->s_root, &vfat_ci_dentry_ops); else - sb->s_root->d_op = &vfat_dentry_ops; + d_set_d_op(sb->s_root, &vfat_dentry_ops); unlock_super(sb); return 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c9627c95482..c9a8a426a39 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -347,7 +347,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } entry = newent ? newent : entry; - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); if (outarg_valid) fuse_change_entry_timeout(entry, &outarg); else diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 44e0a6c57e8..a8b31da19b9 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -626,7 +626,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, entry = d_obtain_alias(inode); if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); fuse_invalidate_entry_cache(entry); } @@ -728,7 +728,7 @@ static struct dentry *fuse_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { - parent->d_op = &fuse_dentry_operations; + d_set_d_op(parent, &fuse_dentry_operations); fuse_invalidate_entry_cache(parent); } diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 5ab3839dfcb..97012ecff56 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } @@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, out_inode: dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3eb1393f7b8..2aeabd4218c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, iput(inode); return -ENOMEM; } - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); *dptr = dentry; return 0; } diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 12cbea7502c..f28f89796f4 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, { struct inode *inode = NULL; - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && IS_ERR(inode)) diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 2b3b8611b41..ea4aefe7c65 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; int res; - dentry->d_op = &hfs_dentry_operations; + d_set_d_op(dentry, &hfs_dentry_operations); hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ef4ee5716ab..0bef62aa4f4 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -434,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) goto bail_iput; - sb->s_root->d_op = &hfs_dentry_operations; + d_set_d_op(sb->s_root, &hfs_dentry_operations); /* everything's okay */ return 0; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 9d59c0571f5..ccab87145f7 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; - dentry->d_op = &hfsplus_dentry_operations; + d_set_d_op(dentry, &hfsplus_dentry_operations); dentry->d_fsdata = NULL; hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 182e83a9079..ddf712e4700 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -419,7 +419,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto cleanup; } - sb->s_root->d_op = &hfsplus_dentry_operations; + d_set_d_op(sb->s_root, &hfsplus_dentry_operations); str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 861113fcfc8..0bc81cf256b 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -612,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, goto out_put; d_add(dentry, inode); - dentry->d_op = &hostfs_dentry_ops; + d_set_d_op(dentry, &hostfs_dentry_ops); return NULL; out_put: diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 35526df1fd3..32c13a94e1e 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -65,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = { void hpfs_set_dentry_operations(struct dentry *dentry) { - dentry->d_op = &hpfs_dentry_operations; + d_set_d_op(dentry, &hpfs_dentry_operations); } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d8f3a652243..844a7903c72 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -949,7 +949,7 @@ root_found: table += 2; if (opt.check == 'r') table++; - s->s_root->d_op = &isofs_dentry_ops[table]; + d_set_d_op(s->s_root, &isofs_dentry_ops[table]); kfree(opt.iocharset); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 715f7d31804..679a849c3b2 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -172,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam struct inode *inode; struct page *page; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); page = alloc_page(GFP_USER); if (!page) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 57f90dad891..a151cbdec62 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1466,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc jfs_info("jfs_lookup: name = %s", name); if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); if ((name[0] == '.') && (len == 1)) inum = dip->i_ino; @@ -1495,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); return dentry; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b715b0f7bdf..3150d766e0d 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -525,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_no_root; if (sbi->mntflag & JFS_OS2) - sb->s_root->d_op = &jfs_ci_dentry_operations; + d_set_d_op(sb->s_root, &jfs_ci_dentry_operations); /* logical blocks are represented by 40 bits in pxd_t, etc. */ sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; diff --git a/fs/libfs.c b/fs/libfs.c index 28b36663c44..889311e3d06 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -59,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &simple_dentry_operations; + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/fs/minix/namei.c b/fs/minix/namei.c index c0d35a3acce..1b9e07728a9 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/namei.c b/fs/namei.c index c731b50a618..90bd2873e11 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -587,6 +587,17 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) return dentry; } +static inline int need_reval_dot(struct dentry *dentry) +{ + if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) + return 0; + + if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) + return 0; + + return 1; +} + /* * force_reval_path - force revalidation of a dentry * @@ -610,10 +621,9 @@ force_reval_path(struct path *path, struct nameidata *nd) /* * only check on filesystems where it's possible for the dentry to - * become stale. It's assumed that if this flag is set then the - * d_revalidate op will also be defined. + * become stale. */ - if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) + if (!need_reval_dot(dentry)) return 0; status = dentry->d_op->d_revalidate(dentry, nd); @@ -1003,7 +1013,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (parent->d_op && parent->d_op->d_hash) { + if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { int err = parent->d_op->d_hash(parent, nd->inode, name); if (err < 0) return err; @@ -1029,7 +1039,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (dentry->d_op && dentry->d_op->d_revalidate) { + if (dentry->d_flags & DCACHE_OP_REVALIDATE) { /* We commonly drop rcu-walk here */ if (nameidata_dentry_drop_rcu(nd, dentry)) return -ECHILD; @@ -1043,7 +1053,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, if (!dentry) goto need_lookup; found: - if (dentry->d_op && dentry->d_op->d_revalidate) + if (dentry->d_flags & DCACHE_OP_REVALIDATE) goto need_revalidate; done: path->mnt = mnt; @@ -1281,8 +1291,7 @@ return_reval: * We bypassed the ordinary revalidation routines. * We may need to check the cached dentry for staleness. */ - if (nd->path.dentry && nd->path.dentry->d_sb && - (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { + if (need_reval_dot(nd->path.dentry)) { if (nameidata_drop_rcu_maybe(nd)) return -ECHILD; err = -ESTALE; @@ -1602,7 +1611,7 @@ static struct dentry *__lookup_hash(struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (base->d_op && base->d_op->d_hash) { + if (base->d_flags & DCACHE_OP_HASH) { err = base->d_op->d_hash(base, inode, name); dentry = ERR_PTR(err); if (err < 0) @@ -1616,7 +1625,7 @@ static struct dentry *__lookup_hash(struct qstr *name, */ dentry = d_lookup(base, name); - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) + if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) dentry = do_revalidate(dentry, nd); if (!dentry) @@ -2070,7 +2079,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, follow_dotdot(nd); dir = nd->path.dentry; case LAST_DOT: - if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { + if (need_reval_dot(dir)) { if (!dir->d_op->d_revalidate(dir, nd)) { error = -ESTALE; goto exit; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0ba3cdc95a4..4b9cbb28d7f 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -633,7 +633,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, entry->ino = iunique(dir->i_sb, 2); inode = ncp_iget(dir->i_sb, entry); if (inode) { - newdent->d_op = &ncp_dentry_operations; + d_set_d_op(newdent, &ncp_dentry_operations); d_instantiate(newdent, inode); if (!hashed) d_rehash(newdent); @@ -889,7 +889,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc if (inode) { ncp_new_dentry(dentry); add_entry: - dentry->d_op = &ncp_dentry_operations; + d_set_d_op(dentry, &ncp_dentry_operations); d_add(dentry, inode); error = 0; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 60047dbeb38..0c75a5f3caf 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -717,7 +717,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &ncp_root_dentry_operations; + d_set_d_op(sb->s_root, &ncp_root_dentry_operations); return 0; out_no_root: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index eb77471b882..37e0a8bb077 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) if (dentry == NULL) return; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); if (IS_ERR(inode)) goto out; @@ -1188,7 +1188,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (dentry->d_name.len > NFS_SERVER(dir)->namelen) goto out; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* * If we're doing an exclusive create, optimize away the lookup @@ -1333,7 +1333,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = ERR_PTR(-ENAMETOOLONG); goto out; } - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash * the dentry. */ diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b3e36c3430d..c3a5a112683 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -121,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fsinfo.fattr); return ret; @@ -228,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fattr); diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 19ad145d2af..6adafa57606 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -138,7 +138,7 @@ check_gen: result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = &ocfs2_dentry_ops; + d_set_d_op(result, &ocfs2_dentry_ops); else mlog_errno(PTR_ERR(result)); @@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); if (!IS_ERR(parent)) - parent->d_op = &ocfs2_dentry_ops; + d_set_d_op(parent, &ocfs2_dentry_ops); bail_unlock: ocfs2_inode_unlock(dir, 0); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ff5744e1e36..d14cad6e2e4 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, spin_unlock(&oi->ip_lock); bail_add: - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); ret = d_splice_alias(inode, dentry); if (inode) { @@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, OCFS2_I(inode)->ip_blkno, parent_fe_bh, @@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry, } ihold(inode); - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); out_commit: @@ -1794,7 +1794,7 @@ static int ocfs2_symlink(struct inode *dir, mlog_errno(status); goto bail; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, le64_to_cpu(fe->i_blkno), parent_fe_bh, @@ -2459,7 +2459,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, goto out_commit; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); status = 0; out_commit: diff --git a/fs/pipe.c b/fs/pipe.c index ae3592dcc88..01a78656781 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1004,7 +1004,7 @@ struct file *create_write_pipe(int flags) goto err_inode; path.mnt = mntget(pipe_mnt); - path.dentry->d_op = &pipefs_dentry_operations; + d_set_d_op(path.dentry, &pipefs_dentry_operations); d_instantiate(path.dentry, inode); err = -ENFILE; diff --git a/fs/proc/base.c b/fs/proc/base.c index d932fdb6a24..85f0a80912a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1969,7 +1969,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2137,7 +2137,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, ei->fd = fd; inode->i_mode = S_IFREG | S_IRUSR; inode->i_fop = &proc_fdinfo_file_operations; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2196,7 +2196,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, NULL)) @@ -2615,7 +2615,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &proc_base_dentry_operations; + d_set_d_op(dentry, &proc_base_dentry_operations); d_add(dentry, inode); error = NULL; out: @@ -2926,7 +2926,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ @@ -3169,7 +3169,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 1d607be36d9..f766be29d2c 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, out_unlock: if (inode) { - dentry->d_op = &proc_dentry_operations; + d_set_d_op(dentry, &proc_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 998e3a715bc..35efd85a4d3 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -120,7 +120,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; err = NULL; - dentry->d_op = &proc_sys_dentry_operations; + d_set_d_op(dentry, &proc_sys_dentry_operations); d_add(dentry, inode); out: @@ -201,7 +201,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, dput(child); return -ENOMEM; } else { - child->d_op = &proc_sys_dentry_operations; + d_set_d_op(child, &proc_sys_dentry_operations); d_add(child, inode); } } else { diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5d04a7828e7..e0f0d7ea10a 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -990,7 +990,7 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - dentry->d_op = &xattr_lookup_poison_ops; + d_set_d_op(dentry, &xattr_lookup_poison_ops); if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 27e1102e303..3e076caa8da 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -701,7 +701,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, /* instantiate and hash dentry */ ret = d_find_alias(inode); if (!ret) { - dentry->d_op = &sysfs_dentry_ops; + d_set_d_op(dentry, &sysfs_dentry_ops); dentry->d_fsdata = sysfs_get(sd); d_add(dentry, inode); } else { diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 7507aeb4c90..b5e68da2db3 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -48,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > SYSV_NAMELEN) return ERR_PTR(-ENAMETOOLONG); ino = sysv_inode_by_name(dentry); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 3d9c62be0c1..76712aefc4a 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size) if (sbi->s_forced_ro) sb->s_flags |= MS_RDONLY; if (sbi->s_truncate) - sb->s_root->d_op = &sysv_dentry_operations; + d_set_d_op(sb->s_root, &sysv_dentry_operations); return 1; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e4414693065..f4b40a751f0 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -183,6 +183,11 @@ struct dentry_operations { #define DCACHE_GENOCIDE 0x0200 #define DCACHE_MOUNTED 0x0400 /* is a mountpoint */ +#define DCACHE_OP_HASH 0x1000 +#define DCACHE_OP_COMPARE 0x2000 +#define DCACHE_OP_REVALIDATE 0x4000 +#define DCACHE_OP_DELETE 0x8000 + extern spinlock_t dcache_inode_lock; extern seqlock_t rename_lock; @@ -201,6 +206,7 @@ extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); +extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9f41470c394..51cddc11cd8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2222,7 +2222,7 @@ static struct dentry *cgroup_lookup(struct inode *dir, if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &cgroup_dentry_operations; + d_set_d_op(dentry, &cgroup_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/net/socket.c b/net/socket.c index 817dc92e9ef..991e266bc7a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -368,7 +368,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) } path.mnt = mntget(sock_mnt); - path.dentry->d_op = &sockfs_dentry_operations; + d_set_d_op(path.dentry, &sockfs_dentry_operations); d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 2899fe27f88..09f01f41e55 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -591,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent, } } if (!dentry->d_inode) - dentry->d_op = &rpc_dentry_operations; + d_set_d_op(dentry, &rpc_dentry_operations); out_err: return dentry; } -- cgit v1.2.3 From 4b936885ab04dc6e0bb0ef35e0e23c1a7364d9e5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:07 +1100 Subject: fs: improve scalability of pseudo filesystems Regardless of how much we possibly try to scale dcache, there is likely always going to be some fundamental contention when adding or removing children under the same parent. Pseudo filesystems do not seem need to have connected dentries because by definition they are disconnected. Signed-off-by: Nick Piggin --- fs/anon_inodes.c | 2 +- fs/dcache.c | 12 ++++++++++++ fs/pipe.c | 2 +- include/linux/dcache.h | 1 + net/socket.c | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index aca8806fa20..9d92b33da8a 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name, this.name = name; this.len = strlen(name); this.hash = 0; - path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); + path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); if (!path.dentry) goto err_module; diff --git a/fs/dcache.c b/fs/dcache.c index 09ec945f3c9..9e6e6db7686 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1330,6 +1330,18 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) +{ + struct dentry *dentry = d_alloc(NULL, name); + if (dentry) { + dentry->d_sb = sb; + dentry->d_parent = dentry; + dentry->d_flags |= DCACHE_DISCONNECTED; + } + return dentry; +} +EXPORT_SYMBOL(d_alloc_pseudo); + struct dentry *d_alloc_name(struct dentry *parent, const char *name) { struct qstr q; diff --git a/fs/pipe.c b/fs/pipe.c index 01a78656781..cfe3a7f2ee2 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -999,7 +999,7 @@ struct file *create_write_pipe(int flags) goto err; err = -ENOMEM; - path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); if (!path.dentry) goto err_inode; path.mnt = mntget(pipe_mnt); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d719e4de804..c0a2ca97c72 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -211,6 +211,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_obtain_alias(struct inode *); diff --git a/net/socket.c b/net/socket.c index 991e266bc7a..0ee74c32532 100644 --- a/net/socket.c +++ b/net/socket.c @@ -361,7 +361,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); return -ENOMEM; -- cgit v1.2.3 From b3e19d924b6eaf2ca7d22cba99a517c5171007b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:11 +1100 Subject: fs: scale mntget/mntput The problem that this patch aims to fix is vfsmount refcounting scalability. We need to take a reference on the vfsmount for every successful path lookup, which often go to the same mount point. The fundamental difficulty is that a "simple" reference count can never be made scalable, because any time a reference is dropped, we must check whether that was the last reference. To do that requires communication with all other CPUs that may have taken a reference count. We can make refcounts more scalable in a couple of ways, involving keeping distributed counters, and checking for the global-zero condition less frequently. - check the global sum once every interval (this will delay zero detection for some interval, so it's probably a showstopper for vfsmounts). - keep a local count and only taking the global sum when local reaches 0 (this is difficult for vfsmounts, because we can't hold preempt off for the life of a reference, so a counter would need to be per-thread or tied strongly to a particular CPU which requires more locking). - keep a local difference of increments and decrements, which allows us to sum the total difference and hence find the refcount when summing all CPUs. Then, keep a single integer "long" refcount for slow and long lasting references, and only take the global sum of local counters when the long refcount is 0. This last scheme is what I implemented here. Attached mounts and process root and working directory references are "long" references, and everything else is a short reference. This allows scalable vfsmount references during path walking over mounted subtrees and unattached (lazy umounted) mounts with processes still running in them. This results in one fewer atomic op in the fastpath: mntget is now just a per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock and non-atomic decrement in the common case. However code is otherwise bigger and heavier, so single threaded performance is basically a wash. Signed-off-by: Nick Piggin --- arch/ia64/kernel/perfmon.c | 2 +- drivers/mtd/mtdchar.c | 2 +- fs/anon_inodes.c | 2 +- fs/fs_struct.c | 26 +++-- fs/internal.h | 1 + fs/namei.c | 24 +++++ fs/namespace.c | 242 +++++++++++++++++++++++++++++++++++++-------- fs/pipe.c | 2 +- fs/pnode.c | 4 +- fs/super.c | 2 +- include/linux/mount.h | 53 ++++------ include/linux/path.h | 2 + net/socket.c | 19 +++- 13 files changed, 283 insertions(+), 98 deletions(-) (limited to 'net') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 5a24f40bb48..f099b82703d 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -1542,7 +1542,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) * any operations on the root directory. However, we need a non-trivial * d_name - pfm: will go nicely and kill the special-casing in procfs. */ -static struct vfsmount *pfmfs_mnt; +static struct vfsmount *pfmfs_mnt __read_mostly; static int __init init_pfm_fs(void) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 4759d827e8c..f511dd15fd3 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1201,7 +1201,7 @@ err_unregister_chdev: static void __exit cleanup_mtdchar(void) { unregister_mtd_user(&mtdchar_notifier); - mntput(mtd_inode_mnt); + mntput_long(mtd_inode_mnt); unregister_filesystem(&mtd_inodefs_type); __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 9d92b33da8a..5fd38112a6c 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -232,7 +232,7 @@ static int __init anon_inode_init(void) return 0; err_mntput: - mntput(anon_inode_mnt); + mntput_long(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 60b8531f41c..68ca487bedb 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -17,11 +17,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; - path_get(path); + path_get_long(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put(&old_root); + path_put_long(&old_root); } /* @@ -36,12 +36,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; - path_get(path); + path_get_long(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put(&old_pwd); + path_put_long(&old_pwd); } void chroot_fs_refs(struct path *old_root, struct path *new_root) @@ -59,13 +59,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) write_seqcount_begin(&fs->seq); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->root = *new_root; count++; } if (fs->pwd.dentry == old_root->dentry && fs->pwd.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->pwd = *new_root; count++; } @@ -76,13 +76,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put(old_root); + path_put_long(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put(&fs->root); - path_put(&fs->pwd); + path_put_long(&fs->root); + path_put_long(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -115,7 +115,13 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) spin_lock_init(&fs->lock); seqcount_init(&fs->seq); fs->umask = old->umask; - get_fs_root_and_pwd(old, &fs->root, &fs->pwd); + + spin_lock(&old->lock); + fs->root = old->root; + path_get_long(&fs->root); + fs->pwd = old->pwd; + path_get_long(&fs->pwd); + spin_unlock(&old->lock); } return fs; } diff --git a/fs/internal.h b/fs/internal.h index e43b9a4dbf4..9687c2ee273 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **); extern void free_vfsmnt(struct vfsmount *); extern struct vfsmount *alloc_vfsmnt(const char *); +extern unsigned int mnt_get_count(struct vfsmount *mnt); extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, struct vfsmount *); diff --git a/fs/namei.c b/fs/namei.c index 4e957bf744a..19433cdba01 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -367,6 +367,18 @@ void path_get(struct path *path) } EXPORT_SYMBOL(path_get); +/** + * path_get_long - get a long reference to a path + * @path: path to get the reference to + * + * Given a path increment the reference count to the dentry and the vfsmount. + */ +void path_get_long(struct path *path) +{ + mntget_long(path->mnt); + dget(path->dentry); +} + /** * path_put - put a reference to a path * @path: path to put the reference to @@ -380,6 +392,18 @@ void path_put(struct path *path) } EXPORT_SYMBOL(path_put); +/** + * path_put_long - put a long reference to a path + * @path: path to put the reference to + * + * Given a path decrement the reference count to the dentry and the vfsmount. + */ +void path_put_long(struct path *path) +{ + dput(path->dentry); + mntput_long(path->mnt); +} + /** * nameidata_drop_rcu - drop this nameidata out of rcu-walk * @nd: nameidata pathwalk data to drop diff --git a/fs/namespace.c b/fs/namespace.c index 03b82350f02..3ddfd9046c4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt) mnt->mnt_group_id = 0; } +/* + * vfsmount lock must be held for read + */ +static inline void mnt_add_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_add(mnt->mnt_pcp->mnt_count, n); +#else + preempt_disable(); + mnt->mnt_count += n; + preempt_enable(); +#endif +} + +static inline void mnt_set_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_write(mnt->mnt_pcp->mnt_count, n); +#else + mnt->mnt_count = n; +#endif +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_inc_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, 1); +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_dec_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, -1); +} + +/* + * vfsmount lock must be held for write + */ +unsigned int mnt_get_count(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + unsigned int count = atomic_read(&mnt->mnt_longrefs); + int cpu; + + for_each_possible_cpu(cpu) { + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; + } + + return count; +#else + return mnt->mnt_count; +#endif +} + struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name) goto out_free_id; } - atomic_set(&mnt->mnt_count, 1); +#ifdef CONFIG_SMP + mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); + if (!mnt->mnt_pcp) + goto out_free_devname; + + atomic_set(&mnt->mnt_longrefs, 1); +#else + mnt->mnt_count = 1; + mnt->mnt_writers = 0; +#endif + INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); @@ -165,13 +233,6 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); -#endif -#ifdef CONFIG_SMP - mnt->mnt_writers = alloc_percpu(int); - if (!mnt->mnt_writers) - goto out_free_devname; -#else - mnt->mnt_writers = 0; #endif } return mnt; @@ -219,7 +280,7 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly); static inline void mnt_inc_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; + this_cpu_inc(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers++; #endif @@ -228,7 +289,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt) static inline void mnt_dec_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; + this_cpu_dec(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers--; #endif @@ -241,7 +302,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt) int cpu; for_each_possible_cpu(cpu) { - count += *per_cpu_ptr(mnt->mnt_writers, cpu); + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; } return count; @@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt) kfree(mnt->mnt_devname); mnt_free_id(mnt); #ifdef CONFIG_SMP - free_percpu(mnt->mnt_writers); + free_percpu(mnt->mnt_pcp); #endif kmem_cache_free(mnt_cache, mnt); } @@ -652,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, return NULL; } -static inline void __mntput(struct vfsmount *mnt) +static inline void mntfree(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; + /* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this @@ -662,8 +724,8 @@ static inline void __mntput(struct vfsmount *mnt) * to make r/w->r/o transitions. */ /* - * atomic_dec_and_lock() used to deal with ->mnt_count decrements - * provides barriers, so mnt_get_writers() below is safe. AV + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. */ WARN_ON(mnt_get_writers(mnt)); fsnotify_vfsmount_delete(mnt); @@ -672,28 +734,113 @@ static inline void __mntput(struct vfsmount *mnt) deactivate_super(sb); } -void mntput_no_expire(struct vfsmount *mnt) -{ -repeat: - if (atomic_add_unless(&mnt->mnt_count, -1, 1)) - return; +#ifdef CONFIG_SMP +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ + if (!longrefs) { +put_again: + br_read_lock(vfsmount_lock); + if (likely(atomic_read(&mnt->mnt_longrefs))) { + mnt_dec_count(mnt); + br_read_unlock(vfsmount_lock); + return; + } + br_read_unlock(vfsmount_lock); + } else { + BUG_ON(!atomic_read(&mnt->mnt_longrefs)); + if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) + return; + } + br_write_lock(vfsmount_lock); - if (!atomic_dec_and_test(&mnt->mnt_count)) { + if (!longrefs) + mnt_dec_count(mnt); + else + atomic_dec(&mnt->mnt_longrefs); + if (mnt_get_count(mnt)) { br_write_unlock(vfsmount_lock); return; } - if (likely(!mnt->mnt_pinned)) { + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - __mntput(mnt); + acct_auto_close_mnt(mnt); + goto put_again; + } + br_write_unlock(vfsmount_lock); + mntfree(mnt); +} +#else +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ +put_again: + mnt_dec_count(mnt); + if (likely(mnt_get_count(mnt))) return; + br_write_lock(vfsmount_lock); + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto put_again; } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + mntfree(mnt); +} +#endif + +static void mntput_no_expire(struct vfsmount *mnt) +{ + __mntput(mnt, 0); +} + +void mntput(struct vfsmount *mnt) +{ + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 0); + } +} +EXPORT_SYMBOL(mntput); + +struct vfsmount *mntget(struct vfsmount *mnt) +{ + if (mnt) + mnt_inc_count(mnt); + return mnt; +} +EXPORT_SYMBOL(mntget); + +void mntput_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 1); + } +#else + mntput(mnt); +#endif } -EXPORT_SYMBOL(mntput_no_expire); +EXPORT_SYMBOL(mntput_long); + +struct vfsmount *mntget_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) + atomic_inc(&mnt->mnt_longrefs); + return mnt; +#else + return mntget(mnt); +#endif +} +EXPORT_SYMBOL(mntget_long); void mnt_pin(struct vfsmount *mnt) { @@ -701,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt) mnt->mnt_pinned++; br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { - atomic_inc(&mnt->mnt_count); + mnt_inc_count(mnt); mnt->mnt_pinned--; } br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_unpin); static inline void mangle(struct seq_file *m, const char *s) @@ -1008,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - br_read_lock(vfsmount_lock); + /* write lock needed for mnt_get_count */ + br_write_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { - actual_refs += atomic_read(&p->mnt_count); + actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1040,10 +1186,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_read_lock(vfsmount_lock); + br_write_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1070,7 +1216,7 @@ void release_mounts(struct list_head *head) dput(dentry); mntput(m); } - mntput(mnt); + mntput_long(mnt); } } @@ -1125,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags) flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; - if (atomic_read(&mnt->mnt_count) != 2) + /* + * probably don't strictly need the lock here if we examined + * all race cases, but it's a slowpath. + */ + br_write_lock(vfsmount_lock); + if (mnt_get_count(mnt) != 2) { + br_write_lock(vfsmount_lock); return -EBUSY; + } + br_write_unlock(vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1815,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, unlock: up_write(&namespace_sem); - mntput(newmnt); + mntput_long(newmnt); return err; } @@ -2148,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, if (fs) { if (p == fs->root.mnt) { rootmnt = p; - fs->root.mnt = mntget(q); + fs->root.mnt = mntget_long(q); } if (p == fs->pwd.mnt) { pwdmnt = p; - fs->pwd.mnt = mntget(q); + fs->pwd.mnt = mntget_long(q); } } p = next_mnt(p, mnt_ns->root); @@ -2161,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, up_write(&namespace_sem); if (rootmnt) - mntput(rootmnt); + mntput_long(rootmnt); if (pwdmnt) - mntput(pwdmnt); + mntput_long(pwdmnt); return new_ns; } @@ -2350,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); + error = 0; path_put(&root_parent); path_put(&parent_path); @@ -2376,6 +2531,7 @@ static void __init init_mount_tree(void) mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); + ns = create_mnt_ns(mnt); if (IS_ERR(ns)) panic("Can't allocate initial namespace"); diff --git a/fs/pipe.c b/fs/pipe.c index cfe3a7f2ee2..68f1f8e4e23 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void) static void __exit exit_pipe_fs(void) { unregister_filesystem(&pipe_fs_type); - mntput(pipe_mnt); + mntput_long(pipe_mnt); } fs_initcall(init_pipe_fs); diff --git a/fs/pnode.c b/fs/pnode.c index 8066b8dd748..d42514e3238 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -288,7 +288,7 @@ out: */ static inline int do_refcount_check(struct vfsmount *mnt, int count) { - int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; + int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; return (mycount > count); } @@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. * - * vfsmount lock must be held for read or write + * vfsmount lock must be held for write */ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) { diff --git a/fs/super.c b/fs/super.c index 968ba013011..823e061faa8 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1140,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return mnt; err: - mntput(mnt); + mntput_long(mnt); return ERR_PTR(err); } diff --git a/include/linux/mount.h b/include/linux/mount.h index 5e7a59408dd..1869ea24a73 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -13,6 +13,7 @@ #include #include #include +#include #include struct super_block; @@ -46,12 +47,24 @@ struct mnt_namespace; #define MNT_INTERNAL 0x4000 +struct mnt_pcp { + int mnt_count; + int mnt_writers; +}; + struct vfsmount { struct list_head mnt_hash; struct vfsmount *mnt_parent; /* fs we are mounted on */ struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ +#ifdef CONFIG_SMP + struct mnt_pcp __percpu *mnt_pcp; + atomic_t mnt_longrefs; +#else + int mnt_count; + int mnt_writers; +#endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ int mnt_flags; @@ -70,57 +83,25 @@ struct vfsmount { struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ - /* - * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount - * to let these frequently modified fields in a separate cache line - * (so that reads of mnt_flags wont ping-pong on SMP machines) - */ - atomic_t mnt_count; int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; -#ifdef CONFIG_SMP - int __percpu *mnt_writers; -#else - int mnt_writers; -#endif }; -static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - return mnt->mnt_writers; -#else - return &mnt->mnt_writers; -#endif -} - -static inline struct vfsmount *mntget(struct vfsmount *mnt) -{ - if (mnt) - atomic_inc(&mnt->mnt_count); - return mnt; -} - struct file; /* forward dec */ extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); -extern void mntput_no_expire(struct vfsmount *mnt); +extern void mntput(struct vfsmount *mnt); +extern struct vfsmount *mntget(struct vfsmount *mnt); +extern void mntput_long(struct vfsmount *mnt); +extern struct vfsmount *mntget_long(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); -static inline void mntput(struct vfsmount *mnt) -{ - if (mnt) { - mnt->mnt_expiry_mark = 0; - mntput_no_expire(mnt); - } -} - extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); diff --git a/include/linux/path.h b/include/linux/path.h index edc98dec626..a581e8c0653 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -10,7 +10,9 @@ struct path { }; extern void path_get(struct path *); +extern void path_get_long(struct path *); extern void path_put(struct path *); +extern void path_put_long(struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { diff --git a/net/socket.c b/net/socket.c index 0ee74c32532..815bba3d2fe 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2390,6 +2390,8 @@ EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { + int err; + /* * Initialize sock SLAB cache. */ @@ -2406,8 +2408,15 @@ static int __init sock_init(void) */ init_inodecache(); - register_filesystem(&sock_fs_type); + + err = register_filesystem(&sock_fs_type); + if (err) + goto out_fs; sock_mnt = kern_mount(&sock_fs_type); + if (IS_ERR(sock_mnt)) { + err = PTR_ERR(sock_mnt); + goto out_mount; + } /* The real protocol initialization is performed in later initcalls. */ @@ -2420,7 +2429,13 @@ static int __init sock_init(void) skb_timestamping_init(); #endif - return 0; +out: + return err; + +out_mount: + unregister_filesystem(&sock_fs_type); +out_fs: + goto out; } core_initcall(sock_init); /* early initcall */ -- cgit v1.2.3