From 778e24bb6dd8682318bb496d4bfdc32b501a6420 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 23 Jun 2011 01:34:59 +0000 Subject: xfs: reset inode per-lifetime state when recycling it XFS inodes has several per-lifetime state fields that determine the behaviour of the inode. These state fields are not all reset when an inode is reused from the reclaimable state. This can lead to unexpected behaviour of the new inode such as speculative preallocation not being truncated away in the expected manner for local files until the inode is subsequently truncated, freed or cycles out of the cache. It can also lead to an inode being considered to be a filestream inode or having been truncated when that is not the case. Rework the reinitialisation of the inode when it is recycled to ensure that it is pristine before it is reused. While there, also fix the resetting of state flags in the recycling error paths so the inode does not become unreclaimable. Signed-off-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_iget.c | 13 +++++++++---- fs/xfs/xfs_inode.h | 10 ++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index cb9b6d1469f7..3631783b2b53 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -253,16 +253,21 @@ xfs_iget_cache_hit( rcu_read_lock(); spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_INEW; - ip->i_flags |= XFS_IRECLAIMABLE; - __xfs_inode_set_reclaim_tag(pag, ip); + ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); + ASSERT(ip->i_flags & XFS_IRECLAIMABLE); trace_xfs_iget_reclaim_fail(ip); goto out_error; } spin_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); + + /* + * Clear the per-lifetime state in the inode as we are now + * effectively a new inode and need to return to the initial + * state before reuse occurs. + */ + ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; ip->i_flags |= XFS_INEW; __xfs_inode_clear_reclaim_tag(mp, pag, ip); inode->i_state = I_NEW; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3ae6d58e5473..964cfea77686 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -383,6 +383,16 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ +/* + * Per-lifetime flags need to be reset when re-using a reclaimable inode during + * inode lookup. Thi prevents unintended behaviour on the new inode from + * ocurring. + */ +#define XFS_IRECLAIM_RESET_FLAGS \ + (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ + XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ + XFS_IFILESTREAM); + /* * Flags for inode locking. * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) -- cgit v1.2.3 From df4368a146d2b350b8398babfe11e2088f741d67 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 23 Jun 2011 01:35:00 +0000 Subject: xfs: clear XFS_IDIRTY_RELEASE on truncate down When an inode is truncated down, speculative preallocation is removed from the inode. This should also reset the state bits for controlling whether preallocation is subsequently removed when the file is next closed. The flag is not being cleared, so repeated operations on a file that first involve a truncate (e.g. multiple repeated dd invocations on a file) give different file layouts for the second and subsequent invocations. Fix this by clearing the XFS_IDIRTY_RELEASE state bit when the XFS_ITRUNCATED bit is detected in xfs_release() and hence ensure that speculative delalloc is removed on files that have been truncated down. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_vnodeops.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index b7a5fe7c52c8..619720705bc6 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -960,8 +960,11 @@ xfs_release( * be exposed to that problem. */ truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); - if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) - xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); + if (truncated) { + xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); + if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) + xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); + } } if (ip->i_d.di_nlink == 0) -- cgit v1.2.3 From 4a33821236f2ef3af0081e8a5eec1301cbed3125 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 23 Jun 2011 01:35:01 +0000 Subject: xfs: prevent bogus assert when trying to remove non-existent attribute If the attribute fork on an inode is in btree format and has multiple levels (i.e node format rather than leaf format), then a lookup failure will trigger an assert failure in xfs_da_path_shift if the flag XFS_DA_OP_OKNOENT is not set. This flag is used to indicate to the directory btree code that not finding an entry is not a fatal error. In the case of doing a lookup for a directory name removal, this is valid as a user cannot insert an arbitrary name to remove from the directory btree. However, in the case of the attribute tree, a user has direct control over the attribute name and can ask for any random name to be removed without any validation. In this case, fsstress is asking for a non-existent user.selinux attribute to be removed, and that is causing xfs_da_path_shift() to fall off the bottom of the tree where it asserts that a lookup failure is allowed. Because the flag is not set, we die a horrible death on a debug enable kernel. Prevent this assert from firing on attribute removes by adding the op_flag XFS_DA_OP_OKNOENT to atribute removal operations. Discovered when testing on a SELinux enabled system by fsstress in test 070 by trying to remove a non-existent user.selinux attribute. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_attr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index c86375378810..01d2072fb6d4 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -489,6 +489,13 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) args.total = 0; args.whichfork = XFS_ATTR_FORK; + /* + * we have no control over the attribute names that userspace passes us + * to remove, so we have to allow the name lookup prior to attribute + * removal to fail. + */ + args.op_flags = XFS_DA_OP_OKNOENT; + /* * Attach the dquots to the inode. */ -- cgit v1.2.3