From 778e24bb6dd8682318bb496d4bfdc32b501a6420 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 23 Jun 2011 01:34:59 +0000
Subject: xfs: reset inode per-lifetime state when recycling it

XFS inodes has several per-lifetime state fields that determine the
behaviour of the inode. These state fields are not all reset when an
inode is reused from the reclaimable state.

This can lead to unexpected behaviour of the new inode such as
speculative preallocation not being truncated away in the expected
manner for local files until the inode is subsequently truncated,
freed or cycles out of the cache. It can also lead to an inode being
considered to be a filestream inode or having been truncated when
that is not the case.

Rework the reinitialisation of the inode when it is recycled to
ensure that it is pristine before it is reused. While there, also
fix the resetting of state flags in the recycling error paths so the
inode does not become unreclaimable.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/xfs_iget.c  | 13 +++++++++----
 fs/xfs/xfs_inode.h | 10 ++++++++++
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index cb9b6d1469f7..3631783b2b53 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -253,16 +253,21 @@ xfs_iget_cache_hit(
 			rcu_read_lock();
 			spin_lock(&ip->i_flags_lock);
 
-			ip->i_flags &= ~XFS_INEW;
-			ip->i_flags |= XFS_IRECLAIMABLE;
-			__xfs_inode_set_reclaim_tag(pag, ip);
+			ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
+			ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
 			trace_xfs_iget_reclaim_fail(ip);
 			goto out_error;
 		}
 
 		spin_lock(&pag->pag_ici_lock);
 		spin_lock(&ip->i_flags_lock);
-		ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
+
+		/*
+		 * Clear the per-lifetime state in the inode as we are now
+		 * effectively a new inode and need to return to the initial
+		 * state before reuse occurs.
+		 */
+		ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
 		ip->i_flags |= XFS_INEW;
 		__xfs_inode_clear_reclaim_tag(mp, pag, ip);
 		inode->i_state = I_NEW;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 3ae6d58e5473..964cfea77686 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -383,6 +383,16 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
 #define XFS_ITRUNCATED		0x0020	/* truncated down so flush-on-close */
 #define XFS_IDIRTY_RELEASE	0x0040	/* dirty release already seen */
 
+/*
+ * Per-lifetime flags need to be reset when re-using a reclaimable inode during
+ * inode lookup. Thi prevents unintended behaviour on the new inode from
+ * ocurring.
+ */
+#define XFS_IRECLAIM_RESET_FLAGS	\
+	(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
+	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
+	 XFS_IFILESTREAM);
+
 /*
  * Flags for inode locking.
  * Bit ranges:	1<<1  - 1<<16-1 -- iolock/ilock modes (bitfield)
-- 
cgit v1.2.3


From df4368a146d2b350b8398babfe11e2088f741d67 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 23 Jun 2011 01:35:00 +0000
Subject: xfs: clear XFS_IDIRTY_RELEASE on truncate down

When an inode is truncated down, speculative preallocation is
removed from the inode. This should also reset the state bits for
controlling whether preallocation is subsequently removed when the
file is next closed. The flag is not being cleared, so repeated
operations on a file that first involve a truncate (e.g. multiple
repeated dd invocations on a file) give different file layouts for
the second and subsequent invocations.

Fix this by clearing the XFS_IDIRTY_RELEASE state bit when the
XFS_ITRUNCATED bit is detected in xfs_release() and hence ensure
that speculative delalloc is removed on files that have been
truncated down.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/xfs_vnodeops.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b7a5fe7c52c8..619720705bc6 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -960,8 +960,11 @@ xfs_release(
 		 * be exposed to that problem.
 		 */
 		truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
-		if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
-			xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
+		if (truncated) {
+			xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
+			if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
+				xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
+		}
 	}
 
 	if (ip->i_d.di_nlink == 0)
-- 
cgit v1.2.3


From 4a33821236f2ef3af0081e8a5eec1301cbed3125 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 23 Jun 2011 01:35:01 +0000
Subject: xfs: prevent bogus assert when trying to remove non-existent
 attribute

If the attribute fork on an inode is in btree format and has
multiple levels (i.e node format rather than leaf format), then a
lookup failure will trigger an assert failure in xfs_da_path_shift
if the flag XFS_DA_OP_OKNOENT is not set. This flag is used to
indicate to the directory btree code that not finding an entry is
not a fatal error. In the case of doing a lookup for a directory
name removal, this is valid as a user cannot insert an arbitrary
name to remove from the directory btree.

However, in the case of the attribute tree, a user has direct
control over the attribute name and can ask for any random name to
be removed without any validation. In this case, fsstress is asking
for a non-existent user.selinux attribute to be removed, and that is
causing xfs_da_path_shift() to fall off the bottom of the tree where
it asserts that a lookup failure is allowed. Because the flag is not
set, we die a horrible death on a debug enable kernel.

Prevent this assert from firing on attribute removes by adding the
op_flag XFS_DA_OP_OKNOENT to atribute removal operations.

Discovered when testing on a SELinux enabled system by fsstress in
test 070 by trying to remove a non-existent user.selinux attribute.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/xfs_attr.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index c86375378810..01d2072fb6d4 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -489,6 +489,13 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
 	args.total = 0;
 	args.whichfork = XFS_ATTR_FORK;
 
+	/*
+	 * we have no control over the attribute names that userspace passes us
+	 * to remove, so we have to allow the name lookup prior to attribute
+	 * removal to fail.
+	 */
+	args.op_flags = XFS_DA_OP_OKNOENT;
+
 	/*
 	 * Attach the dquots to the inode.
 	 */
-- 
cgit v1.2.3