From 47460d65a483529b3bc2bf6ccf461ad45f94df83 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 10 Feb 2009 16:05:07 -0800
Subject: ocfs2: Make the ocfs2_caching_info structure self-contained.

We want to use the ocfs2_caching_info structure in places that are not
inodes.  To do that, it can no longer rely on referencing the inode
directly.

This patch moves the flags to ocfs2_caching_info->ci_flags, stores
pointers to the parent's locks on the ocfs2_caching_info, and renames
the constants and flags to reflect its independant state.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/inode.c    |  5 +--
 fs/ocfs2/inode.h    |  2 --
 fs/ocfs2/ocfs2.h    | 23 +++++++++++--
 fs/ocfs2/super.c    |  3 +-
 fs/ocfs2/uptodate.c | 99 +++++++++++++++++++++++++++--------------------------
 fs/ocfs2/uptodate.h |  4 ++-
 6 files changed, 80 insertions(+), 56 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 4dc8890ba316..8ec80445d18c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1118,7 +1118,8 @@ void ocfs2_clear_inode(struct inode *inode)
 			"Clear inode of %llu, inode has %u cache items\n",
 			(unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
 
-	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
+	mlog_bug_on_msg(!(oi->ip_metadata_cache.ci_flags &
+			  OCFS2_CACHE_FL_INLINE),
 			"Clear inode of %llu, inode has a bad flag\n",
 			(unsigned long long)oi->ip_blkno);
 
@@ -1145,7 +1146,7 @@ void ocfs2_clear_inode(struct inode *inode)
 			(unsigned long long)oi->ip_blkno, oi->ip_open_count);
 
 	/* Clear all other flags. */
-	oi->ip_flags = OCFS2_INODE_CACHE_INLINE;
+	oi->ip_flags = 0;
 	oi->ip_created_trans = 0;
 	oi->ip_last_trans = 0;
 	oi->ip_dir_start_lookup = 0;
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ea71525aad41..2f5e1aa0ccbf 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -106,8 +106,6 @@ struct ocfs2_inode_info
 #define OCFS2_INODE_MAYBE_ORPHANED	0x00000020
 /* Does someone have the file open O_DIRECT */
 #define OCFS2_INODE_OPEN_DIRECT		0x00000040
-/* Indicates that the metadata cache should be used as an array. */
-#define OCFS2_INODE_CACHE_INLINE	0x00000080
 
 static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 {
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 39e1d5a39505..eef3bd077c10 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -51,17 +51,36 @@
 /* For struct ocfs2_blockcheck_stats */
 #include "blockcheck.h"
 
+
+/* Caching of metadata buffers */
+
 /* Most user visible OCFS2 inodes will have very few pieces of
  * metadata, but larger files (including bitmaps, etc) must be taken
  * into account when designing an access scheme. We allow a small
  * amount of inlined blocks to be stored on an array and grow the
  * structure into a rb tree when necessary. */
-#define OCFS2_INODE_MAX_CACHE_ARRAY 2
+#define OCFS2_CACHE_INFO_MAX_ARRAY 2
+
+/* Flags for ocfs2_caching_info */
+
+enum ocfs2_caching_info_flags {
+	/* Indicates that the metadata cache is using the inline array */
+	OCFS2_CACHE_FL_INLINE	= 1<<1,
+};
 
 struct ocfs2_caching_info {
+	/*
+	 * The parent structure provides the locks, but because the
+	 * parent structure can differ, struct ocfs2_caching_info needs
+	 * its own pointers to them.
+	 */
+	spinlock_t		*ci_lock;
+	struct mutex		*ci_io_mutex;
+
+	unsigned int		ci_flags;
 	unsigned int		ci_num_cached;
 	union {
-		sector_t	ci_array[OCFS2_INODE_MAX_CACHE_ARRAY];
+	sector_t	ci_array[OCFS2_CACHE_INFO_MAX_ARRAY];
 		struct rb_root	ci_tree;
 	} ci_cache;
 };
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a3f8871d21fd..8f217f6d1363 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1683,7 +1683,8 @@ static void ocfs2_inode_init_once(void *data)
 	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
 	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_init(&oi->vfs_inode);
+	ocfs2_metadata_cache_init(&oi->ip_metadata_cache, &oi->ip_lock,
+				  &oi->ip_io_mutex);
 
 	inode_init_once(&oi->vfs_inode);
 }
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 187b99ff0368..8dbc457ba236 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -75,12 +75,13 @@ struct ocfs2_meta_cache_item {
 
 static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
 
-void ocfs2_metadata_cache_init(struct inode *inode)
+void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
+			       spinlock_t *cache_lock,
+			       struct mutex *io_mutex)
 {
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
-
-	oi->ip_flags |= OCFS2_INODE_CACHE_INLINE;
+	ci->ci_lock = cache_lock;
+	ci->ci_io_mutex = io_mutex;
+	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
 	ci->ci_num_cached = 0;
 }
 
@@ -119,8 +120,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct rb_root root = RB_ROOT;
 
-	spin_lock(&oi->ip_lock);
-	tree = !(oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
+	spin_lock(ci->ci_lock);
+	tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE);
 	to_purge = ci->ci_num_cached;
 
 	mlog(0, "Purge %u %s items from Inode %llu\n", to_purge,
@@ -132,8 +133,8 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	if (tree)
 		root = ci->ci_cache.ci_tree;
 
-	ocfs2_metadata_cache_init(inode);
-	spin_unlock(&oi->ip_lock);
+	ocfs2_metadata_cache_init(ci, ci->ci_lock, ci->ci_io_mutex);
+	spin_unlock(ci->ci_lock);
 
 	purged = ocfs2_purge_copied_metadata_tree(&root);
 	/* If possible, track the number wiped so that we can more
@@ -187,22 +188,23 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 {
 	int index = -1;
 	struct ocfs2_meta_cache_item *item = NULL;
+	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	spin_lock(&oi->ip_lock);
+	spin_lock(ci->ci_lock);
 
 	mlog(0, "Inode %llu, query block %llu (inline = %u)\n",
 	     (unsigned long long)oi->ip_blkno,
 	     (unsigned long long) bh->b_blocknr,
-	     !!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE));
+	     !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
 
-	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE)
+	if (ci->ci_flags & OCFS2_CACHE_FL_INLINE)
 		index = ocfs2_search_cache_array(&oi->ip_metadata_cache,
 						 bh->b_blocknr);
 	else
 		item = ocfs2_search_cache_tree(&oi->ip_metadata_cache,
 					       bh->b_blocknr);
 
-	spin_unlock(&oi->ip_lock);
+	spin_unlock(ci->ci_lock);
 
 	mlog(0, "index = %d, item = %p\n", index, item);
 
@@ -235,7 +237,7 @@ int ocfs2_buffer_uptodate(struct inode *inode,
 
 /* 
  * Determine whether a buffer is currently out on a read-ahead request.
- * ip_io_sem should be held to serialize submitters with the logic here.
+ * ci_io_sem should be held to serialize submitters with the logic here.
  */
 int ocfs2_buffer_read_ahead(struct inode *inode,
 			    struct buffer_head *bh)
@@ -247,7 +249,7 @@ int ocfs2_buffer_read_ahead(struct inode *inode,
 static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
 				     sector_t block)
 {
-	BUG_ON(ci->ci_num_cached >= OCFS2_INODE_MAX_CACHE_ARRAY);
+	BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY);
 
 	mlog(0, "block %llu takes position %u\n", (unsigned long long) block,
 	     ci->ci_num_cached);
@@ -295,13 +297,13 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
 static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi,
 					     struct ocfs2_caching_info *ci)
 {
-	assert_spin_locked(&oi->ip_lock);
+	assert_spin_locked(ci->ci_lock);
 
-	return (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) &&
-		(ci->ci_num_cached < OCFS2_INODE_MAX_CACHE_ARRAY);
+	return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) &&
+		(ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY);
 }
 
-/* tree should be exactly OCFS2_INODE_MAX_CACHE_ARRAY wide. NULL the
+/* tree should be exactly OCFS2_CACHE_INFO_MAX_ARRAY wide. NULL the
  * pointers in tree after we use them - this allows caller to detect
  * when to free in case of error. */
 static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
@@ -310,32 +312,32 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 	int i;
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_INODE_MAX_CACHE_ARRAY,
+	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY,
 			"Inode %llu, num cached = %u, should be %u\n",
 			(unsigned long long)oi->ip_blkno, ci->ci_num_cached,
-			OCFS2_INODE_MAX_CACHE_ARRAY);
-	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
+			OCFS2_CACHE_INFO_MAX_ARRAY);
+	mlog_bug_on_msg(!(ci->ci_flags & OCFS2_CACHE_FL_INLINE),
 			"Inode %llu not marked as inline anymore!\n",
 			(unsigned long long)oi->ip_blkno);
-	assert_spin_locked(&oi->ip_lock);
+	assert_spin_locked(ci->ci_lock);
 
 	/* Be careful to initialize the tree members *first* because
 	 * once the ci_tree is used, the array is junk... */
-	for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++)
+	for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++)
 		tree[i]->c_block = ci->ci_cache.ci_array[i];
 
-	oi->ip_flags &= ~OCFS2_INODE_CACHE_INLINE;
+	ci->ci_flags &= ~OCFS2_CACHE_FL_INLINE;
 	ci->ci_cache.ci_tree = RB_ROOT;
 	/* this will be set again by __ocfs2_insert_cache_tree */
 	ci->ci_num_cached = 0;
 
-	for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) {
+	for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) {
 		__ocfs2_insert_cache_tree(ci, tree[i]);
 		tree[i] = NULL;
 	}
 
 	mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n",
-	     (unsigned long long)oi->ip_blkno, oi->ip_flags, ci->ci_num_cached);
+	     (unsigned long long)oi->ip_blkno, ci->ci_flags, ci->ci_num_cached);
 }
 
 /* Slow path function - memory allocation is necessary. See the
@@ -347,7 +349,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	int i;
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct ocfs2_meta_cache_item *new = NULL;
-	struct ocfs2_meta_cache_item *tree[OCFS2_INODE_MAX_CACHE_ARRAY] =
+	struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
 		{ NULL, };
 
 	mlog(0, "Inode %llu, block %llu, expand = %d\n",
@@ -364,7 +366,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	if (expand_tree) {
 		/* Do *not* allocate an array here - the removal code
 		 * has no way of tracking that. */
-		for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) {
+		for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) {
 			tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep,
 						   GFP_NOFS);
 			if (!tree[i]) {
@@ -376,13 +378,13 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 		}
 	}
 
-	spin_lock(&oi->ip_lock);
+	spin_lock(ci->ci_lock);
 	if (ocfs2_insert_can_use_array(oi, ci)) {
 		mlog(0, "Someone cleared the tree underneath us\n");
 		/* Ok, items were removed from the cache in between
 		 * locks. Detect this and revert back to the fast path */
 		ocfs2_append_cache_array(ci, block);
-		spin_unlock(&oi->ip_lock);
+		spin_unlock(ci->ci_lock);
 		goto out_free;
 	}
 
@@ -390,7 +392,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 		ocfs2_expand_cache(oi, tree);
 
 	__ocfs2_insert_cache_tree(ci, new);
-	spin_unlock(&oi->ip_lock);
+	spin_unlock(ci->ci_lock);
 
 	new = NULL;
 out_free:
@@ -400,14 +402,14 @@ out_free:
 	/* If these were used, then ocfs2_expand_cache re-set them to
 	 * NULL for us. */
 	if (tree[0]) {
-		for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++)
+		for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++)
 			if (tree[i])
 				kmem_cache_free(ocfs2_uptodate_cachep,
 						tree[i]);
 	}
 }
 
-/* Item insertion is guarded by ip_io_mutex, so the insertion path takes
+/* Item insertion is guarded by ci_io_mutex, so the insertion path takes
  * advantage of this by not rechecking for a duplicate insert during
  * the slow case. Additionally, if the cache needs to be bumped up to
  * a tree, the code will not recheck after acquiring the lock --
@@ -442,42 +444,43 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	     (unsigned long long)bh->b_blocknr);
 
 	/* No need to recheck under spinlock - insertion is guarded by
-	 * ip_io_mutex */
-	spin_lock(&oi->ip_lock);
+	 * ci_io_mutex */
+	spin_lock(ci->ci_lock);
 	if (ocfs2_insert_can_use_array(oi, ci)) {
 		/* Fast case - it's an array and there's a free
 		 * spot. */
 		ocfs2_append_cache_array(ci, bh->b_blocknr);
-		spin_unlock(&oi->ip_lock);
+		spin_unlock(ci->ci_lock);
 		return;
 	}
 
 	expand = 0;
-	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) {
+	if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
 		/* We need to bump things up to a tree. */
 		expand = 1;
 	}
-	spin_unlock(&oi->ip_lock);
+	spin_unlock(ci->ci_lock);
 
 	__ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand);
 }
 
 /* Called against a newly allocated buffer. Most likely nobody should
  * be able to read this sort of metadata while it's still being
- * allocated, but this is careful to take ip_io_mutex anyway. */
+ * allocated, but this is careful to take ci_io_mutex anyway. */
 void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 				   struct buffer_head *bh)
 {
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	/* This should definitely *not* exist in our cache */
 	BUG_ON(ocfs2_buffer_cached(oi, bh));
 
 	set_buffer_uptodate(bh);
 
-	mutex_lock(&oi->ip_io_mutex);
+	mutex_lock(ci->ci_io_mutex);
 	ocfs2_set_buffer_uptodate(inode, bh);
-	mutex_unlock(&oi->ip_io_mutex);
+	mutex_unlock(ci->ci_io_mutex);
 }
 
 /* Requires ip_lock. */
@@ -487,7 +490,7 @@ static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci,
 	sector_t *array = ci->ci_cache.ci_array;
 	int bytes;
 
-	BUG_ON(index < 0 || index >= OCFS2_INODE_MAX_CACHE_ARRAY);
+	BUG_ON(index < 0 || index >= OCFS2_CACHE_INFO_MAX_ARRAY);
 	BUG_ON(index >= ci->ci_num_cached);
 	BUG_ON(!ci->ci_num_cached);
 
@@ -523,13 +526,13 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	spin_lock(&oi->ip_lock);
+	spin_lock(ci->ci_lock);
 	mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n",
 	     (unsigned long long)oi->ip_blkno,
 	     (unsigned long long) block, ci->ci_num_cached,
-	     oi->ip_flags & OCFS2_INODE_CACHE_INLINE);
+	     ci->ci_flags & OCFS2_CACHE_FL_INLINE);
 
-	if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) {
+	if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
 		index = ocfs2_search_cache_array(ci, block);
 		if (index != -1)
 			ocfs2_remove_metadata_array(ci, index);
@@ -538,7 +541,7 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
 		if (item)
 			ocfs2_remove_metadata_tree(ci, item);
 	}
-	spin_unlock(&oi->ip_lock);
+	spin_unlock(ci->ci_lock);
 
 	if (item)
 		kmem_cache_free(ocfs2_uptodate_cachep, item);
@@ -577,7 +580,7 @@ int __init init_ocfs2_uptodate_cache(void)
 		return -ENOMEM;
 
 	mlog(0, "%u inlined cache items per inode.\n",
-	     OCFS2_INODE_MAX_CACHE_ARRAY);
+	     OCFS2_CACHE_INFO_MAX_ARRAY);
 
 	return 0;
 }
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 531b4b3a0c47..bd749e1434f1 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -29,7 +29,9 @@
 int __init init_ocfs2_uptodate_cache(void);
 void exit_ocfs2_uptodate_cache(void);
 
-void ocfs2_metadata_cache_init(struct inode *inode);
+void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
+			       spinlock_t *cache_lock,
+			       struct mutex *io_mutex);
 void ocfs2_metadata_cache_purge(struct inode *inode);
 
 int ocfs2_buffer_uptodate(struct inode *inode,
-- 
cgit v1.2.3


From 6e5a3d7538ad4e46a976862f593faf65750e37cc Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 10 Feb 2009 19:00:37 -0800
Subject: ocfs2: Change metadata caching locks to an operations structure.

We don't really want to cart around too many new fields on the
ocfs2_caching_info structure.  So let's wrap all our access of the
parent object in a set of operations.  One pointer on caching_info, and
more flexibility to boot.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/inode.c    |  49 ++++++++++++++++++++
 fs/ocfs2/inode.h    |   1 +
 fs/ocfs2/ocfs2.h    |   8 ++--
 fs/ocfs2/super.c    |   4 +-
 fs/ocfs2/uptodate.c | 129 ++++++++++++++++++++++++++++++++++------------------
 fs/ocfs2/uptodate.h |  30 +++++++++++-
 6 files changed, 169 insertions(+), 52 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 8ec80445d18c..36bb588f8fcb 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1395,3 +1395,52 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
 {
 	return ocfs2_read_inode_block_full(inode, bh, 0);
 }
+
+static struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_info *ci)
+{
+	return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
+}
+
+static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	return oi->ip_blkno;
+}
+
+static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	spin_lock(&oi->ip_lock);
+}
+
+static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	spin_unlock(&oi->ip_lock);
+}
+
+static void ocfs2_inode_cache_io_lock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	mutex_lock(&oi->ip_io_mutex);
+}
+
+static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	mutex_unlock(&oi->ip_io_mutex);
+}
+
+const struct ocfs2_caching_operations ocfs2_inode_caching_ops = {
+	.co_owner		= ocfs2_inode_cache_owner,
+	.co_cache_lock		= ocfs2_inode_cache_lock,
+	.co_cache_unlock	= ocfs2_inode_cache_unlock,
+	.co_io_lock		= ocfs2_inode_cache_io_lock,
+	.co_io_unlock		= ocfs2_inode_cache_io_unlock,
+};
+
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 2f5e1aa0ccbf..cd1caca545f5 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -118,6 +118,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 extern struct kmem_cache *ocfs2_inode_cache;
 
 extern const struct address_space_operations ocfs2_aops;
+extern const struct ocfs2_caching_operations ocfs2_inode_caching_ops;
 
 void ocfs2_clear_inode(struct inode *inode);
 void ocfs2_delete_inode(struct inode *inode);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index eef3bd077c10..6e54a496299e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -68,14 +68,14 @@ enum ocfs2_caching_info_flags {
 	OCFS2_CACHE_FL_INLINE	= 1<<1,
 };
 
+struct ocfs2_caching_operations;
 struct ocfs2_caching_info {
 	/*
 	 * The parent structure provides the locks, but because the
-	 * parent structure can differ, struct ocfs2_caching_info needs
-	 * its own pointers to them.
+	 * parent structure can differ, it provides locking operations
+	 * to struct ocfs2_caching_info.
 	 */
-	spinlock_t		*ci_lock;
-	struct mutex		*ci_io_mutex;
+	const struct ocfs2_caching_operations *ci_ops;
 
 	unsigned int		ci_flags;
 	unsigned int		ci_num_cached;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8f217f6d1363..746ed5d4dda9 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1683,8 +1683,8 @@ static void ocfs2_inode_init_once(void *data)
 	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
 	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_init(&oi->ip_metadata_cache, &oi->ip_lock,
-				  &oi->ip_io_mutex);
+	ocfs2_metadata_cache_init(&oi->ip_metadata_cache,
+				  &ocfs2_inode_caching_ops);
 
 	inode_init_once(&oi->vfs_inode);
 }
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 8dbc457ba236..226d0429fd7f 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -75,12 +75,48 @@ struct ocfs2_meta_cache_item {
 
 static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
 
+static u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	return ci->ci_ops->co_owner(ci);
+}
+
+static void ocfs2_metadata_cache_lock(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ci->ci_ops->co_cache_lock(ci);
+}
+
+static void ocfs2_metadata_cache_unlock(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ci->ci_ops->co_cache_unlock(ci);
+}
+
+static void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ci->ci_ops->co_io_lock(ci);
+}
+
+static void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ci->ci_ops->co_io_unlock(ci);
+}
+
+
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
-			       spinlock_t *cache_lock,
-			       struct mutex *io_mutex)
+			       const struct ocfs2_caching_operations *ops)
 {
-	ci->ci_lock = cache_lock;
-	ci->ci_io_mutex = io_mutex;
+	BUG_ON(!ops);
+
+	ci->ci_ops = ops;
 	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
 	ci->ci_num_cached = 0;
 }
@@ -120,12 +156,15 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct rb_root root = RB_ROOT;
 
-	spin_lock(ci->ci_lock);
+	BUG_ON(!ci || !ci->ci_ops);
+
+	ocfs2_metadata_cache_lock(ci);
 	tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE);
 	to_purge = ci->ci_num_cached;
 
-	mlog(0, "Purge %u %s items from Inode %llu\n", to_purge,
-	     tree ? "array" : "tree", (unsigned long long)oi->ip_blkno);
+	mlog(0, "Purge %u %s items from Owner %llu\n", to_purge,
+	     tree ? "array" : "tree",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci));
 
 	/* If we're a tree, save off the root so that we can safely
 	 * initialize the cache. We do the work to free tree members
@@ -133,16 +172,17 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
 	if (tree)
 		root = ci->ci_cache.ci_tree;
 
-	ocfs2_metadata_cache_init(ci, ci->ci_lock, ci->ci_io_mutex);
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_init(ci, ci->ci_ops);
+	ocfs2_metadata_cache_unlock(ci);
 
 	purged = ocfs2_purge_copied_metadata_tree(&root);
 	/* If possible, track the number wiped so that we can more
 	 * easily detect counting errors. Unfortunately, this is only
 	 * meaningful for trees. */
 	if (tree && purged != to_purge)
-		mlog(ML_ERROR, "Inode %llu, count = %u, purged = %u\n",
-		     (unsigned long long)oi->ip_blkno, to_purge, purged);
+		mlog(ML_ERROR, "Owner %llu, count = %u, purged = %u\n",
+		     (unsigned long long)ocfs2_metadata_cache_owner(ci),
+		     to_purge, purged);
 }
 
 /* Returns the index in the cache array, -1 if not found.
@@ -190,10 +230,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 	struct ocfs2_meta_cache_item *item = NULL;
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	spin_lock(ci->ci_lock);
+	ocfs2_metadata_cache_lock(ci);
 
-	mlog(0, "Inode %llu, query block %llu (inline = %u)\n",
-	     (unsigned long long)oi->ip_blkno,
+	mlog(0, "Owner %llu, query block %llu (inline = %u)\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
 	     (unsigned long long) bh->b_blocknr,
 	     !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
 
@@ -204,7 +244,7 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 		item = ocfs2_search_cache_tree(&oi->ip_metadata_cache,
 					       bh->b_blocknr);
 
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_unlock(ci);
 
 	mlog(0, "index = %d, item = %p\n", index, item);
 
@@ -294,18 +334,19 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
 	ci->ci_num_cached++;
 }
 
+/* co_cache_lock() must be held */
 static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi,
 					     struct ocfs2_caching_info *ci)
 {
-	assert_spin_locked(ci->ci_lock);
-
 	return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) &&
 		(ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY);
 }
 
 /* tree should be exactly OCFS2_CACHE_INFO_MAX_ARRAY wide. NULL the
  * pointers in tree after we use them - this allows caller to detect
- * when to free in case of error. */
+ * when to free in case of error.
+ *
+ * The co_cache_lock() must be held. */
 static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 			       struct ocfs2_meta_cache_item **tree)
 {
@@ -313,13 +354,12 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY,
-			"Inode %llu, num cached = %u, should be %u\n",
-			(unsigned long long)oi->ip_blkno, ci->ci_num_cached,
-			OCFS2_CACHE_INFO_MAX_ARRAY);
+			"Owner %llu, num cached = %u, should be %u\n",
+			(unsigned long long)ocfs2_metadata_cache_owner(ci),
+			ci->ci_num_cached, OCFS2_CACHE_INFO_MAX_ARRAY);
 	mlog_bug_on_msg(!(ci->ci_flags & OCFS2_CACHE_FL_INLINE),
-			"Inode %llu not marked as inline anymore!\n",
-			(unsigned long long)oi->ip_blkno);
-	assert_spin_locked(ci->ci_lock);
+			"Owner %llu not marked as inline anymore!\n",
+			(unsigned long long)ocfs2_metadata_cache_owner(ci));
 
 	/* Be careful to initialize the tree members *first* because
 	 * once the ci_tree is used, the array is junk... */
@@ -337,7 +377,8 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 	}
 
 	mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n",
-	     (unsigned long long)oi->ip_blkno, ci->ci_flags, ci->ci_num_cached);
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
+	     ci->ci_flags, ci->ci_num_cached);
 }
 
 /* Slow path function - memory allocation is necessary. See the
@@ -352,8 +393,8 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
 		{ NULL, };
 
-	mlog(0, "Inode %llu, block %llu, expand = %d\n",
-	     (unsigned long long)oi->ip_blkno,
+	mlog(0, "Owner %llu, block %llu, expand = %d\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
 	     (unsigned long long)block, expand_tree);
 
 	new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS);
@@ -378,13 +419,13 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 		}
 	}
 
-	spin_lock(ci->ci_lock);
+	ocfs2_metadata_cache_lock(ci);
 	if (ocfs2_insert_can_use_array(oi, ci)) {
 		mlog(0, "Someone cleared the tree underneath us\n");
 		/* Ok, items were removed from the cache in between
 		 * locks. Detect this and revert back to the fast path */
 		ocfs2_append_cache_array(ci, block);
-		spin_unlock(ci->ci_lock);
+		ocfs2_metadata_cache_unlock(ci);
 		goto out_free;
 	}
 
@@ -392,7 +433,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 		ocfs2_expand_cache(oi, tree);
 
 	__ocfs2_insert_cache_tree(ci, new);
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_unlock(ci);
 
 	new = NULL;
 out_free:
@@ -409,7 +450,7 @@ out_free:
 	}
 }
 
-/* Item insertion is guarded by ci_io_mutex, so the insertion path takes
+/* Item insertion is guarded by co_io_lock(), so the insertion path takes
  * advantage of this by not rechecking for a duplicate insert during
  * the slow case. Additionally, if the cache needs to be bumped up to
  * a tree, the code will not recheck after acquiring the lock --
@@ -439,18 +480,18 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	if (ocfs2_buffer_cached(oi, bh))
 		return;
 
-	mlog(0, "Inode %llu, inserting block %llu\n",
-	     (unsigned long long)oi->ip_blkno,
+	mlog(0, "Owner %llu, inserting block %llu\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
 	     (unsigned long long)bh->b_blocknr);
 
 	/* No need to recheck under spinlock - insertion is guarded by
-	 * ci_io_mutex */
-	spin_lock(ci->ci_lock);
+	 * co_io_lock() */
+	ocfs2_metadata_cache_lock(ci);
 	if (ocfs2_insert_can_use_array(oi, ci)) {
 		/* Fast case - it's an array and there's a free
 		 * spot. */
 		ocfs2_append_cache_array(ci, bh->b_blocknr);
-		spin_unlock(ci->ci_lock);
+		ocfs2_metadata_cache_unlock(ci);
 		return;
 	}
 
@@ -459,14 +500,14 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 		/* We need to bump things up to a tree. */
 		expand = 1;
 	}
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_unlock(ci);
 
 	__ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand);
 }
 
 /* Called against a newly allocated buffer. Most likely nobody should
  * be able to read this sort of metadata while it's still being
- * allocated, but this is careful to take ci_io_mutex anyway. */
+ * allocated, but this is careful to take co_io_lock() anyway. */
 void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 				   struct buffer_head *bh)
 {
@@ -478,9 +519,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 
 	set_buffer_uptodate(bh);
 
-	mutex_lock(ci->ci_io_mutex);
+	ocfs2_metadata_cache_io_lock(ci);
 	ocfs2_set_buffer_uptodate(inode, bh);
-	mutex_unlock(ci->ci_io_mutex);
+	ocfs2_metadata_cache_io_unlock(ci);
 }
 
 /* Requires ip_lock. */
@@ -526,9 +567,9 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
-	spin_lock(ci->ci_lock);
-	mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n",
-	     (unsigned long long)oi->ip_blkno,
+	ocfs2_metadata_cache_lock(ci);
+	mlog(0, "Owner %llu, remove %llu, items = %u, array = %u\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
 	     (unsigned long long) block, ci->ci_num_cached,
 	     ci->ci_flags & OCFS2_CACHE_FL_INLINE);
 
@@ -541,7 +582,7 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
 		if (item)
 			ocfs2_remove_metadata_tree(ci, item);
 	}
-	spin_unlock(ci->ci_lock);
+	ocfs2_metadata_cache_unlock(ci);
 
 	if (item)
 		kmem_cache_free(ocfs2_uptodate_cachep, item);
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index bd749e1434f1..3b33eb88d320 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -26,12 +26,38 @@
 #ifndef OCFS2_UPTODATE_H
 #define OCFS2_UPTODATE_H
 
+/*
+ * The caching code relies on locking provided by the user of
+ * struct ocfs2_caching_info.  These operations connect that up.
+ */
+struct ocfs2_caching_operations {
+	/*
+	 * A u64 representing the owning structure.  Usually this
+	 * is the block number (i_blkno or whatnot).  This is used so
+	 * that caching log messages can identify the owning structure.
+	 */
+	u64	(*co_owner)(struct ocfs2_caching_info *ci);
+
+	/*
+	 * Lock and unlock the caching data.  These will not sleep, and
+	 * should probably be spinlocks.
+	 */
+	void	(*co_cache_lock)(struct ocfs2_caching_info *ci);
+	void	(*co_cache_unlock)(struct ocfs2_caching_info *ci);
+
+	/*
+	 * Lock and unlock for disk I/O.  These will sleep, and should
+	 * be mutexes.
+	 */
+	void	(*co_io_lock)(struct ocfs2_caching_info *ci);
+	void	(*co_io_unlock)(struct ocfs2_caching_info *ci);
+};
+
 int __init init_ocfs2_uptodate_cache(void);
 void exit_ocfs2_uptodate_cache(void);
 
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
-			       spinlock_t *cache_lock,
-			       struct mutex *io_mutex);
+			       const struct ocfs2_caching_operations *ops);
 void ocfs2_metadata_cache_purge(struct inode *inode);
 
 int ocfs2_buffer_uptodate(struct inode *inode,
-- 
cgit v1.2.3


From 8cb471e8f82506937fe5e2e9fb0bf90f6b1f1170 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 10 Feb 2009 20:00:41 -0800
Subject: ocfs2: Take the inode out of the metadata read/write paths.

We are really passing the inode into the ocfs2_read/write_blocks()
functions to get at the metadata cache.  This commit passes the cache
directly into the metadata block functions, divorcing them from the
inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c          | 13 ++++----
 fs/ocfs2/buffer_head_io.c | 47 ++++++++++++++-------------
 fs/ocfs2/buffer_head_io.h |  8 ++---
 fs/ocfs2/dir.c            | 23 +++++++------
 fs/ocfs2/dlmglue.c        |  2 +-
 fs/ocfs2/extent_map.c     |  4 +--
 fs/ocfs2/inode.c          | 24 +++++++++-----
 fs/ocfs2/inode.h          |  5 +++
 fs/ocfs2/journal.c        |  4 +--
 fs/ocfs2/localalloc.c     |  2 +-
 fs/ocfs2/namei.c          |  5 +--
 fs/ocfs2/quota_global.c   |  2 +-
 fs/ocfs2/quota_local.c    |  6 ++--
 fs/ocfs2/resize.c         |  2 +-
 fs/ocfs2/slot_map.c       | 10 +++---
 fs/ocfs2/suballoc.c       |  4 +--
 fs/ocfs2/super.c          |  2 +-
 fs/ocfs2/uptodate.c       | 83 ++++++++++++++++++++++-------------------------
 fs/ocfs2/uptodate.h       | 21 ++++++++----
 fs/ocfs2/xattr.c          | 23 +++++++------
 20 files changed, 156 insertions(+), 134 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ab513ddaeff2..d5dffcfa192a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -859,7 +859,7 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(inode, eb_blkno, &tmp,
+	rc = ocfs2_read_block(INODE_CACHE(inode), eb_blkno, &tmp,
 			      ocfs2_validate_extent_block);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -949,7 +949,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
 				mlog_errno(status);
 				goto bail;
 			}
-			ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
+			ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
+						      bhs[i]);
 
 			status = ocfs2_journal_access_eb(handle, inode, bhs[i],
 							 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -2559,7 +2560,7 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
 			     le16_to_cpu(el->l_next_free_rec));
 
 			ocfs2_journal_dirty(handle, bh);
-			ocfs2_remove_from_cache(inode, bh);
+			ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
 			continue;
 		}
 
@@ -2572,7 +2573,7 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
 		if (ret)
 			mlog_errno(ret);
 
-		ocfs2_remove_from_cache(inode, bh);
+		ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
 	}
 }
 
@@ -6010,7 +6011,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
 		tl->tl_used = 0;
 
 		ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
-		status = ocfs2_write_block(osb, tl_bh, tl_inode);
+		status = ocfs2_write_block(osb, tl_bh, INODE_CACHE(tl_inode));
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -6719,7 +6720,7 @@ delete:
 
 			mlog(0, "deleting this extent block.\n");
 
-			ocfs2_remove_from_cache(inode, bh);
+			ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
 
 			BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
 			BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 15c8e6deee2e..d43d34a1dd31 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -52,12 +52,12 @@ enum ocfs2_state_bits {
 BUFFER_FNS(NeedsValidate, needs_validate);
 
 int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
-		      struct inode *inode)
+		      struct ocfs2_caching_info *ci)
 {
 	int ret = 0;
 
-	mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n",
-		   (unsigned long long)bh->b_blocknr, inode);
+	mlog_entry("(bh->b_blocknr = %llu, ci=%p)\n",
+		   (unsigned long long)bh->b_blocknr, ci);
 
 	BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
 	BUG_ON(buffer_jbd(bh));
@@ -70,7 +70,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 		goto out;
 	}
 
-	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_lock(ci);
 
 	lock_buffer(bh);
 	set_buffer_uptodate(bh);
@@ -85,7 +85,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 	wait_on_buffer(bh);
 
 	if (buffer_uptodate(bh)) {
-		ocfs2_set_buffer_uptodate(inode, bh);
+		ocfs2_set_buffer_uptodate(ci, bh);
 	} else {
 		/* We don't need to remove the clustered uptodate
 		 * information for this bh as it's not marked locally
@@ -94,7 +94,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 		put_bh(bh);
 	}
 
-	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_unlock(ci);
 out:
 	mlog_exit(ret);
 	return ret;
@@ -177,7 +177,7 @@ bail:
 	return status;
 }
 
-int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
+int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 		      struct buffer_head *bhs[], int flags,
 		      int (*validate)(struct super_block *sb,
 				      struct buffer_head *bh))
@@ -185,11 +185,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 	int status = 0;
 	int i, ignore_cache = 0;
 	struct buffer_head *bh;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
 
-	mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
-		   inode, (unsigned long long)block, nr, flags);
+	mlog_entry("(ci=%p, block=(%llu), nr=(%d), flags=%d)\n",
+		   ci, (unsigned long long)block, nr, flags);
 
-	BUG_ON(!inode);
+	BUG_ON(!ci);
 	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
 	       (flags & OCFS2_BH_IGNORE_CACHE));
 
@@ -212,12 +213,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 		goto bail;
 	}
 
-	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_lock(ci);
 	for (i = 0 ; i < nr ; i++) {
 		if (bhs[i] == NULL) {
-			bhs[i] = sb_getblk(inode->i_sb, block++);
+			bhs[i] = sb_getblk(sb, block++);
 			if (bhs[i] == NULL) {
-				mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+				ocfs2_metadata_cache_io_unlock(ci);
 				status = -EIO;
 				mlog_errno(status);
 				goto bail;
@@ -250,11 +251,11 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 		 *    before our is-it-in-flight check.
 		 */
 
-		if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) {
+		if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
 			mlog(ML_UPTODATE,
-			     "bh (%llu), inode %llu not uptodate\n",
+			     "bh (%llu), owner %llu not uptodate\n",
 			     (unsigned long long)bh->b_blocknr,
-			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
+			     (unsigned long long)ocfs2_metadata_cache_owner(ci));
 			/* We're using ignore_cache here to say
 			 * "go to disk" */
 			ignore_cache = 1;
@@ -283,7 +284,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 			 * previously submitted request than we are
 			 * done here. */
 			if ((flags & OCFS2_BH_READAHEAD)
-			    && ocfs2_buffer_read_ahead(inode, bh))
+			    && ocfs2_buffer_read_ahead(ci, bh))
 				continue;
 
 			lock_buffer(bh);
@@ -305,7 +306,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 			 * buffer lock. */
 			if (!(flags & OCFS2_BH_IGNORE_CACHE)
 			    && !(flags & OCFS2_BH_READAHEAD)
-			    && ocfs2_buffer_uptodate(inode, bh)) {
+			    && ocfs2_buffer_uptodate(ci, bh)) {
 				unlock_buffer(bh);
 				continue;
 			}
@@ -327,7 +328,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 
 		if (!(flags & OCFS2_BH_READAHEAD)) {
 			/* We know this can't have changed as we hold the
-			 * inode sem. Avoid doing any work on the bh if the
+			 * owner sem. Avoid doing any work on the bh if the
 			 * journal has it. */
 			if (!buffer_jbd(bh))
 				wait_on_buffer(bh);
@@ -351,7 +352,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 				 * that better not have changed */
 				BUG_ON(buffer_jbd(bh));
 				clear_buffer_needs_validate(bh);
-				status = validate(inode->i_sb, bh);
+				status = validate(sb, bh);
 				if (status) {
 					put_bh(bh);
 					bhs[i] = NULL;
@@ -363,9 +364,9 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
 		/* Always set the buffer in the cache, even if it was
 		 * a forced read, or read-ahead which hasn't yet
 		 * completed. */
-		ocfs2_set_buffer_uptodate(inode, bh);
+		ocfs2_set_buffer_uptodate(ci, bh);
 	}
-	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_unlock(ci);
 
 	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
 	     (unsigned long long)block, nr,
@@ -399,7 +400,7 @@ static void ocfs2_check_super_or_backup(struct super_block *sb,
 
 /*
  * Write super block and backups doesn't need to collaborate with journal,
- * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed
+ * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
  * into this function.
  */
 int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index c75d682dadd8..b97bcc6dde7c 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -33,7 +33,7 @@ void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
 
 int ocfs2_write_block(struct ocfs2_super          *osb,
 		      struct buffer_head  *bh,
-		      struct inode        *inode);
+		      struct ocfs2_caching_info   *ci);
 int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 			   unsigned int nr, struct buffer_head *bhs[]);
 
@@ -44,7 +44,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
  * be set even for a READAHEAD call, as it marks the buffer for later
  * validation.
  */
-int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
+int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 		      struct buffer_head *bhs[], int flags,
 		      int (*validate)(struct super_block *sb,
 				      struct buffer_head *bh));
@@ -55,7 +55,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 #define OCFS2_BH_IGNORE_CACHE      1
 #define OCFS2_BH_READAHEAD         8
 
-static inline int ocfs2_read_block(struct inode *inode, u64 off,
+static inline int ocfs2_read_block(struct ocfs2_caching_info *ci, u64 off,
 				   struct buffer_head **bh,
 				   int (*validate)(struct super_block *sb,
 						   struct buffer_head *bh))
@@ -68,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
 		goto bail;
 	}
 
-	status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
+	status = ocfs2_read_blocks(ci, off, 1, bh, 0, validate);
 
 bail:
 	return status;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b358f3bf896d..273fb7648fce 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -564,7 +564,8 @@ static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
 	int ret;
 	struct buffer_head *tmp = *bh;
 
-	ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block);
+	ret = ocfs2_read_block(INODE_CACHE(dir), phys, &tmp,
+			       ocfs2_validate_dir_block);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -622,7 +623,8 @@ static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
 	u64 blkno = le64_to_cpu(di->i_dx_root);
 	struct buffer_head *tmp = *dx_root_bh;
 
-	ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root);
+	ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
+			       ocfs2_validate_dx_root);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
 	if (!ret && !*dx_root_bh)
@@ -662,7 +664,8 @@ static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
 	int ret;
 	struct buffer_head *tmp = *dx_leaf_bh;
 
-	ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf);
+	ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
+			       ocfs2_validate_dx_leaf);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
 	if (!ret && !*dx_leaf_bh)
@@ -680,7 +683,7 @@ static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
 {
 	int ret;
 
-	ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0,
+	ret = ocfs2_read_blocks(INODE_CACHE(dir), start, num, dx_leaf_bhs, 0,
 				ocfs2_validate_dx_leaf);
 	if (ret)
 		mlog_errno(ret);
@@ -2332,7 +2335,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	ocfs2_set_new_buffer_uptodate(inode, new_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
 
 	status = ocfs2_journal_access_db(handle, inode, new_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -2418,7 +2421,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 		ret = -EIO;
 		goto out;
 	}
-	ocfs2_set_new_buffer_uptodate(dir, dx_root_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
 
 	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
@@ -2495,7 +2498,7 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
 		}
 		dx_leaves[i] = bh;
 
-		ocfs2_set_new_buffer_uptodate(dir, bh);
+		ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh);
 
 		ret = ocfs2_journal_access_dl(handle, dir, bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
@@ -3005,7 +3008,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 		goto out_commit;
 	}
 
-	ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh);
 
 	ret = ocfs2_journal_access_db(handle, dir, dirdata_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
@@ -3387,7 +3390,7 @@ do_extend:
 		goto bail;
 	}
 
-	ocfs2_set_new_buffer_uptodate(dir, new_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh);
 
 	status = ocfs2_journal_access_db(handle, dir, new_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -4565,7 +4568,7 @@ remove_index:
 		goto out;
 	}
 
-	ocfs2_remove_from_cache(dir, dx_root_bh);
+	ocfs2_remove_from_cache(INODE_CACHE(dir), dx_root_bh);
 out:
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &dealloc);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 110bb57c46ab..fe15cee0322a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2127,7 +2127,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
 
 	/* This will discard any caching information we might have had
 	 * for the inode metadata. */
-	ocfs2_metadata_cache_purge(inode);
+	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
 
 	ocfs2_extent_map_trunc(inode, 0);
 
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f2bb1a04d253..dbd8a16d5125 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -862,8 +862,8 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 			BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
 		}
 
-		rc = ocfs2_read_blocks(inode, p_block, count, bhs + done,
-				       flags, validate);
+		rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
+				       bhs + done, flags, validate);
 		if (rc) {
 			mlog_errno(rc);
 			break;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 36bb588f8fcb..1c9713cceb39 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -662,7 +662,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 		goto bail_commit;
 	}
 
-	ocfs2_remove_from_cache(inode, di_bh);
+	ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
 	vfs_dq_free_inode(inode);
 
 	status = ocfs2_free_dinode(handle, inode_alloc_inode,
@@ -1112,14 +1112,14 @@ void ocfs2_clear_inode(struct inode *inode)
 	ocfs2_lock_res_free(&oi->ip_inode_lockres);
 	ocfs2_lock_res_free(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_purge(inode);
+	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
 
-	mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached,
+	mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached,
 			"Clear inode of %llu, inode has %u cache items\n",
-			(unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
+			(unsigned long long)oi->ip_blkno,
+			INODE_CACHE(inode)->ci_num_cached);
 
-	mlog_bug_on_msg(!(oi->ip_metadata_cache.ci_flags &
-			  OCFS2_CACHE_FL_INLINE),
+	mlog_bug_on_msg(!(INODE_CACHE(inode)->ci_flags & OCFS2_CACHE_FL_INLINE),
 			"Clear inode of %llu, inode has a bad flag\n",
 			(unsigned long long)oi->ip_blkno);
 
@@ -1381,8 +1381,8 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
-			       flags, ocfs2_validate_inode_block);
+	rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno,
+			       1, &tmp, flags, ocfs2_validate_inode_block);
 
 	/* If ocfs2_read_blocks() got us a new bh, pass it up. */
 	if (!rc && !*bh)
@@ -1408,6 +1408,13 @@ static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
 	return oi->ip_blkno;
 }
 
+static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+	return oi->vfs_inode.i_sb;
+}
+
 static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
 {
 	struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
@@ -1438,6 +1445,7 @@ static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci)
 
 const struct ocfs2_caching_operations ocfs2_inode_caching_ops = {
 	.co_owner		= ocfs2_inode_cache_owner,
+	.co_get_super		= ocfs2_inode_cache_get_super,
 	.co_cache_lock		= ocfs2_inode_cache_lock,
 	.co_cache_unlock	= ocfs2_inode_cache_unlock,
 	.co_io_lock		= ocfs2_inode_cache_io_lock,
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index cd1caca545f5..b0a71b22712c 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -120,6 +120,11 @@ extern struct kmem_cache *ocfs2_inode_cache;
 extern const struct address_space_operations ocfs2_aops;
 extern const struct ocfs2_caching_operations ocfs2_inode_caching_ops;
 
+static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
+{
+	return &OCFS2_I(inode)->ip_metadata_cache;
+}
+
 void ocfs2_clear_inode(struct inode *inode);
 void ocfs2_delete_inode(struct inode *inode);
 void ocfs2_drop_inode(struct inode *inode);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index c48b93ac6b65..ddf08d384ba1 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -898,7 +898,7 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
 		ocfs2_bump_recovery_generation(fe);
 
 	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
-	status = ocfs2_write_block(osb, bh, journal->j_inode);
+	status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
 	if (status < 0)
 		mlog_errno(status);
 
@@ -1642,7 +1642,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 					ocfs2_get_recovery_generation(fe);
 
 	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
-	status = ocfs2_write_block(osb, bh, inode);
+	status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
 	if (status < 0)
 		mlog_errno(status);
 
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index bac7e6abaf47..da5dd6a70e16 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -392,7 +392,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
 	ocfs2_clear_local_alloc(alloc);
 
 	ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
-	status = ocfs2_write_block(osb, alloc_bh, inode);
+	status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
 	if (status < 0)
 		mlog_errno(status);
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8601f934010b..689761b57a18 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -507,7 +507,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 		mlog_errno(status);
 		goto leave;
 	}
-	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), *new_fe_bh);
 
 	status = ocfs2_journal_access_di(handle, inode, *new_fe_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -1527,7 +1527,8 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 			mlog_errno(status);
 			goto bail;
 		}
-		ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]);
+		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
+					      bhs[virtual]);
 
 		status = ocfs2_journal_access(handle, inode, bhs[virtual],
 					      OCFS2_JOURNAL_ACCESS_CREATE);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e1d042..0d7125bb71d9 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -253,7 +253,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 	flush_dcache_page(bh->b_page);
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
-	ocfs2_set_buffer_uptodate(gqinode, bh);
+	ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh);
 	err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type);
 	if (err < 0) {
 		brelse(bh);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index bdb09cb6e1fe..3df2954ac83b 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -993,7 +993,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 		goto out_trans;
 	}
 	dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
-	ocfs2_set_new_buffer_uptodate(lqinode, bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
 	status = ocfs2_journal_access_dq(handle, lqinode, bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
@@ -1027,7 +1027,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 		mlog_errno(status);
 		goto out_trans;
 	}
-	ocfs2_set_new_buffer_uptodate(lqinode, dbh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), dbh);
 	status = ocfs2_journal_access_dq(handle, lqinode, dbh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
@@ -1131,7 +1131,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 		mlog_errno(status);
 		goto out;
 	}
-	ocfs2_set_new_buffer_uptodate(lqinode, bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
 
 	/* Local quota info, chunk header and the new block we initialize */
 	handle = ocfs2_start_trans(OCFS2_SB(sb),
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 424adaa5f900..7465f0fded77 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -514,7 +514,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 		goto out_unlock;
 	}
 
-	ocfs2_set_new_buffer_uptodate(inode, group_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), group_bh);
 
 	ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
 	if (ret) {
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 40661e7824e9..bfbd7e9e949f 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 	 * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
 	 * this is not true, the read of -1 (UINT64_MAX) will fail.
 	 */
-	ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
-				OCFS2_BH_IGNORE_CACHE, NULL);
+	ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
+				si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
 	if (ret == 0) {
 		spin_lock(&osb->osb_lock);
 		ocfs2_update_slot_info(si);
@@ -213,7 +213,7 @@ static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
 		ocfs2_update_disk_slot_old(si, slot_num, &bh);
 	spin_unlock(&osb->osb_lock);
 
-	status = ocfs2_write_block(osb, bh, si->si_inode);
+	status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
 	if (status < 0)
 		mlog_errno(status);
 
@@ -404,8 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 		     (unsigned long long)blkno);
 
 		bh = NULL;  /* Acquire a fresh bh */
-		status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
-					   OCFS2_BH_IGNORE_CACHE, NULL);
+		status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
+					   1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 73a16d4666dc..21aaaaaaa2d3 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -310,7 +310,7 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(inode, gd_blkno, &tmp,
+	rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp,
 			      ocfs2_validate_group_descriptor);
 	if (rc)
 		goto out;
@@ -476,7 +476,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 		mlog_errno(status);
 		goto bail;
 	}
-	ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
 
 	status = ocfs2_block_group_fill(handle,
 					alloc_inode,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 746ed5d4dda9..af118ad98c58 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1683,7 +1683,7 @@ static void ocfs2_inode_init_once(void *data)
 	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
 	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_init(&oi->ip_metadata_cache,
+	ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
 				  &ocfs2_inode_caching_ops);
 
 	inode_init_once(&oi->vfs_inode);
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 226d0429fd7f..1c829e451019 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -75,13 +75,20 @@ struct ocfs2_meta_cache_item {
 
 static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
 
-static u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
+u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
 {
 	BUG_ON(!ci || !ci->ci_ops);
 
 	return ci->ci_ops->co_owner(ci);
 }
 
+struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci)
+{
+	BUG_ON(!ci || !ci->ci_ops);
+
+	return ci->ci_ops->co_get_super(ci);
+}
+
 static void ocfs2_metadata_cache_lock(struct ocfs2_caching_info *ci)
 {
 	BUG_ON(!ci || !ci->ci_ops);
@@ -96,14 +103,14 @@ static void ocfs2_metadata_cache_unlock(struct ocfs2_caching_info *ci)
 	ci->ci_ops->co_cache_unlock(ci);
 }
 
-static void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci)
+void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci)
 {
 	BUG_ON(!ci || !ci->ci_ops);
 
 	ci->ci_ops->co_io_lock(ci);
 }
 
-static void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
+void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
 {
 	BUG_ON(!ci || !ci->ci_ops);
 
@@ -149,11 +156,9 @@ static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
  * This function is a few more lines longer than necessary due to some
  * accounting done here, but I think it's worth tracking down those
  * bugs sooner -- Mark */
-void ocfs2_metadata_cache_purge(struct inode *inode)
+void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci)
 {
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	unsigned int tree, to_purge, purged;
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct rb_root root = RB_ROOT;
 
 	BUG_ON(!ci || !ci->ci_ops);
@@ -223,12 +228,11 @@ ocfs2_search_cache_tree(struct ocfs2_caching_info *ci,
 	return NULL;
 }
 
-static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
+static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
 			       struct buffer_head *bh)
 {
 	int index = -1;
 	struct ocfs2_meta_cache_item *item = NULL;
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	ocfs2_metadata_cache_lock(ci);
 
@@ -238,11 +242,9 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 	     !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
 
 	if (ci->ci_flags & OCFS2_CACHE_FL_INLINE)
-		index = ocfs2_search_cache_array(&oi->ip_metadata_cache,
-						 bh->b_blocknr);
+		index = ocfs2_search_cache_array(ci, bh->b_blocknr);
 	else
-		item = ocfs2_search_cache_tree(&oi->ip_metadata_cache,
-					       bh->b_blocknr);
+		item = ocfs2_search_cache_tree(ci, bh->b_blocknr);
 
 	ocfs2_metadata_cache_unlock(ci);
 
@@ -256,7 +258,7 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
  * 
  * This can be called under lock_buffer()
  */
-int ocfs2_buffer_uptodate(struct inode *inode,
+int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
 			  struct buffer_head *bh)
 {
 	/* Doesn't matter if the bh is in our cache or not -- if it's
@@ -272,17 +274,17 @@ int ocfs2_buffer_uptodate(struct inode *inode,
 
 	/* Ok, locally the buffer is marked as up to date, now search
 	 * our cache to see if we can trust that. */
-	return ocfs2_buffer_cached(OCFS2_I(inode), bh);
+	return ocfs2_buffer_cached(ci, bh);
 }
 
-/* 
+/*
  * Determine whether a buffer is currently out on a read-ahead request.
  * ci_io_sem should be held to serialize submitters with the logic here.
  */
-int ocfs2_buffer_read_ahead(struct inode *inode,
+int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh)
 {
-	return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
+	return buffer_locked(bh) && ocfs2_buffer_cached(ci, bh);
 }
 
 /* Requires ip_lock */
@@ -335,8 +337,7 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
 }
 
 /* co_cache_lock() must be held */
-static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi,
-					     struct ocfs2_caching_info *ci)
+static inline int ocfs2_insert_can_use_array(struct ocfs2_caching_info *ci)
 {
 	return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) &&
 		(ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY);
@@ -347,11 +348,10 @@ static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi,
  * when to free in case of error.
  *
  * The co_cache_lock() must be held. */
-static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
+static void ocfs2_expand_cache(struct ocfs2_caching_info *ci,
 			       struct ocfs2_meta_cache_item **tree)
 {
 	int i;
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY,
 			"Owner %llu, num cached = %u, should be %u\n",
@@ -383,12 +383,11 @@ static void ocfs2_expand_cache(struct ocfs2_inode_info *oi,
 
 /* Slow path function - memory allocation is necessary. See the
  * comment above ocfs2_set_buffer_uptodate for more information. */
-static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
+static void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
 					sector_t block,
 					int expand_tree)
 {
 	int i;
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 	struct ocfs2_meta_cache_item *new = NULL;
 	struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
 		{ NULL, };
@@ -420,7 +419,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	}
 
 	ocfs2_metadata_cache_lock(ci);
-	if (ocfs2_insert_can_use_array(oi, ci)) {
+	if (ocfs2_insert_can_use_array(ci)) {
 		mlog(0, "Someone cleared the tree underneath us\n");
 		/* Ok, items were removed from the cache in between
 		 * locks. Detect this and revert back to the fast path */
@@ -430,7 +429,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
 	}
 
 	if (expand_tree)
-		ocfs2_expand_cache(oi, tree);
+		ocfs2_expand_cache(ci, tree);
 
 	__ocfs2_insert_cache_tree(ci, new);
 	ocfs2_metadata_cache_unlock(ci);
@@ -468,16 +467,14 @@ out_free:
  * Readahead buffers can be passed in here before the I/O request is
  * completed.
  */
-void ocfs2_set_buffer_uptodate(struct inode *inode,
+void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
 			       struct buffer_head *bh)
 {
 	int expand;
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	/* The block may very well exist in our cache already, so avoid
 	 * doing any more work in that case. */
-	if (ocfs2_buffer_cached(oi, bh))
+	if (ocfs2_buffer_cached(ci, bh))
 		return;
 
 	mlog(0, "Owner %llu, inserting block %llu\n",
@@ -487,7 +484,7 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	/* No need to recheck under spinlock - insertion is guarded by
 	 * co_io_lock() */
 	ocfs2_metadata_cache_lock(ci);
-	if (ocfs2_insert_can_use_array(oi, ci)) {
+	if (ocfs2_insert_can_use_array(ci)) {
 		/* Fast case - it's an array and there's a free
 		 * spot. */
 		ocfs2_append_cache_array(ci, bh->b_blocknr);
@@ -502,25 +499,22 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
 	}
 	ocfs2_metadata_cache_unlock(ci);
 
-	__ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand);
+	__ocfs2_set_buffer_uptodate(ci, bh->b_blocknr, expand);
 }
 
 /* Called against a newly allocated buffer. Most likely nobody should
  * be able to read this sort of metadata while it's still being
  * allocated, but this is careful to take co_io_lock() anyway. */
-void ocfs2_set_new_buffer_uptodate(struct inode *inode,
+void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci,
 				   struct buffer_head *bh)
 {
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
-
 	/* This should definitely *not* exist in our cache */
-	BUG_ON(ocfs2_buffer_cached(oi, bh));
+	BUG_ON(ocfs2_buffer_cached(ci, bh));
 
 	set_buffer_uptodate(bh);
 
 	ocfs2_metadata_cache_io_lock(ci);
-	ocfs2_set_buffer_uptodate(inode, bh);
+	ocfs2_set_buffer_uptodate(ci, bh);
 	ocfs2_metadata_cache_io_unlock(ci);
 }
 
@@ -559,13 +553,11 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
 	ci->ci_num_cached--;
 }
 
-static void ocfs2_remove_block_from_cache(struct inode *inode,
+static void ocfs2_remove_block_from_cache(struct ocfs2_caching_info *ci,
 					  sector_t block)
 {
 	int index;
 	struct ocfs2_meta_cache_item *item = NULL;
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
 
 	ocfs2_metadata_cache_lock(ci);
 	mlog(0, "Owner %llu, remove %llu, items = %u, array = %u\n",
@@ -593,23 +585,24 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
  * bother reverting things to an inlined array in the case of a remove
  * which moves us back under the limit.
  */
-void ocfs2_remove_from_cache(struct inode *inode,
+void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci,
 			     struct buffer_head *bh)
 {
 	sector_t block = bh->b_blocknr;
 
-	ocfs2_remove_block_from_cache(inode, block);
+	ocfs2_remove_block_from_cache(ci, block);
 }
 
 /* Called when we remove xattr clusters from an inode. */
-void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
+void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci,
 					    sector_t block,
 					    u32 c_len)
 {
-	unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	unsigned int i, b_len = ocfs2_clusters_to_blocks(sb, 1) * c_len;
 
 	for (i = 0; i < b_len; i++, block++)
-		ocfs2_remove_block_from_cache(inode, block);
+		ocfs2_remove_block_from_cache(ci, block);
 }
 
 int __init init_ocfs2_uptodate_cache(void)
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 3b33eb88d320..f268273d6516 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -38,6 +38,8 @@ struct ocfs2_caching_operations {
 	 */
 	u64	(*co_owner)(struct ocfs2_caching_info *ci);
 
+	/* The superblock is needed during I/O. */
+	struct super_block *(*co_get_super)(struct ocfs2_caching_info *ci);
 	/*
 	 * Lock and unlock the caching data.  These will not sleep, and
 	 * should probably be spinlocks.
@@ -58,20 +60,25 @@ void exit_ocfs2_uptodate_cache(void);
 
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
 			       const struct ocfs2_caching_operations *ops);
-void ocfs2_metadata_cache_purge(struct inode *inode);
+void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci);
 
-int ocfs2_buffer_uptodate(struct inode *inode,
+u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci);
+struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
+void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci);
+void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci);
+
+int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
 			  struct buffer_head *bh);
-void ocfs2_set_buffer_uptodate(struct inode *inode,
+void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
 			       struct buffer_head *bh);
-void ocfs2_set_new_buffer_uptodate(struct inode *inode,
+void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci,
 				   struct buffer_head *bh);
-void ocfs2_remove_from_cache(struct inode *inode,
+void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci,
 			     struct buffer_head *bh);
-void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
+void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci,
 					    sector_t block,
 					    u32 c_len);
-int ocfs2_buffer_read_ahead(struct inode *inode,
+int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh);
 
 #endif /* OCFS2_UPTODATE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d1a27cda984f..19de5c487242 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -254,9 +254,9 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 			break;
 		}
 
-		if (!ocfs2_buffer_uptodate(bucket->bu_inode,
+		if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 					   bucket->bu_bhs[i]))
-			ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
+			ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 						      bucket->bu_bhs[i]);
 	}
 
@@ -271,7 +271,7 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 {
 	int rc;
 
-	rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
+	rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
 			       bucket->bu_blocks, bucket->bu_bhs, 0,
 			       NULL);
 	if (!rc) {
@@ -399,7 +399,7 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(inode, xb_blkno, &tmp,
+	rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
 			      ocfs2_validate_xattr_block);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -724,8 +724,8 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 		}
 
 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
-		ocfs2_remove_xattr_clusters_from_cache(inode, block,
-						       alloc_size);
+		ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
+						       block, alloc_size);
 		cpos += alloc_size;
 		trunc_len -= alloc_size;
 	}
@@ -970,7 +970,8 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 		/* Copy ocfs2_xattr_value */
 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
-			ret = ocfs2_read_block(inode, blkno, &bh, NULL);
+			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
+					       &bh, NULL);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -1208,7 +1209,8 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 
 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
-			ret = ocfs2_read_block(inode, blkno, &bh, NULL);
+			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
+					       &bh, NULL);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -2121,7 +2123,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 		}
 
 		new_bh = sb_getblk(inode->i_sb, first_blkno);
-		ocfs2_set_new_buffer_uptodate(inode, new_bh);
+		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
 
 		ret = ocfs2_journal_access_xb(handle, inode, new_bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
@@ -4845,7 +4847,8 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 	mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
 	     cpos, len, (unsigned long long)blkno);
 
-	ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
+	ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
+					       len);
 
 	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
 	if (ret) {
-- 
cgit v1.2.3


From 66fb345ddd2d343e36692da0ff66126d7a99dc1b Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 15:24:40 -0800
Subject: ocfs2: move ip_last_trans to struct ocfs2_caching_info

We have the read side of metadata caching isolated to struct
ocfs2_caching_info, now we need the write side.  This means the journal
functions.  The journal only does a couple of things with struct inode.

This change moves the ip_last_trans field onto struct
ocfs2_caching_info as ci_last_trans.  This field tells the journal
whether a pending journal flush is required.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/inode.c    |  3 +--
 fs/ocfs2/inode.h    |  2 --
 fs/ocfs2/journal.h  |  5 +++--
 fs/ocfs2/ocfs2.h    |  4 ++++
 fs/ocfs2/super.c    |  1 -
 fs/ocfs2/uptodate.c | 22 +++++++++++++++++++---
 fs/ocfs2/uptodate.h |  1 +
 7 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 1c9713cceb39..a47750dea059 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1112,7 +1112,7 @@ void ocfs2_clear_inode(struct inode *inode)
 	ocfs2_lock_res_free(&oi->ip_inode_lockres);
 	ocfs2_lock_res_free(&oi->ip_open_lockres);
 
-	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
+	ocfs2_metadata_cache_exit(INODE_CACHE(inode));
 
 	mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached,
 			"Clear inode of %llu, inode has %u cache items\n",
@@ -1148,7 +1148,6 @@ void ocfs2_clear_inode(struct inode *inode)
 	/* Clear all other flags. */
 	oi->ip_flags = 0;
 	oi->ip_created_trans = 0;
-	oi->ip_last_trans = 0;
 	oi->ip_dir_start_lookup = 0;
 	oi->ip_blkno = 0ULL;
 
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index b0a71b22712c..2cae2514e7fb 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -63,8 +63,6 @@ struct ocfs2_inode_info
 	/* next two are protected by trans_inc_lock */
 	/* which transaction were we created on? Zero if none. */
 	unsigned long			ip_created_trans;
-	/* last transaction we were a part of. */
-	unsigned long			ip_last_trans;
 
 	struct ocfs2_caching_info	ip_metadata_cache;
 
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 2c3222aec622..d4ac19739d7c 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -94,7 +94,7 @@ static inline void ocfs2_set_inode_lock_trans(struct ocfs2_journal *journal,
 					      struct inode *inode)
 {
 	spin_lock(&trans_inc_lock);
-	OCFS2_I(inode)->ip_last_trans = journal->j_trans_id;
+	INODE_CACHE(inode)->ci_last_trans = journal->j_trans_id;
 	spin_unlock(&trans_inc_lock);
 }
 
@@ -109,7 +109,8 @@ static inline int ocfs2_inode_fully_checkpointed(struct inode *inode)
 	struct ocfs2_journal *journal = OCFS2_SB(inode->i_sb)->journal;
 
 	spin_lock(&trans_inc_lock);
-	ret = time_after(journal->j_trans_id, OCFS2_I(inode)->ip_last_trans);
+	ret = time_after(journal->j_trans_id,
+			 INODE_CACHE(inode)->ci_last_trans);
 	spin_unlock(&trans_inc_lock);
 	return ret;
 }
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6e54a496299e..c9bd7ce30ba6 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -77,6 +77,10 @@ struct ocfs2_caching_info {
 	 */
 	const struct ocfs2_caching_operations *ci_ops;
 
+	/* last transaction we were a part of. */
+	unsigned long		ci_last_trans;
+
+	/* Cache structures */
 	unsigned int		ci_flags;
 	unsigned int		ci_num_cached;
 	union {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index af118ad98c58..4212547e78a2 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1669,7 +1669,6 @@ static void ocfs2_inode_init_once(void *data)
 	ocfs2_extent_map_init(&oi->vfs_inode);
 	INIT_LIST_HEAD(&oi->ip_io_markers);
 	oi->ip_created_trans = 0;
-	oi->ip_last_trans = 0;
 	oi->ip_dir_start_lookup = 0;
 
 	init_rwsem(&oi->ip_alloc_sem);
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 1c829e451019..81c82200b908 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -118,16 +118,32 @@ void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
 }
 
 
+static void ocfs2_metadata_cache_reset(struct ocfs2_caching_info *ci,
+				       int clear)
+{
+	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
+	ci->ci_num_cached = 0;
+
+	if (clear)
+		ci->ci_last_trans = 0;
+}
+
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
 			       const struct ocfs2_caching_operations *ops)
 {
 	BUG_ON(!ops);
 
 	ci->ci_ops = ops;
-	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
-	ci->ci_num_cached = 0;
+	ocfs2_metadata_cache_reset(ci, 1);
+}
+
+void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci)
+{
+	ocfs2_metadata_cache_purge(ci);
+	ocfs2_metadata_cache_reset(ci, 1);
 }
 
+
 /* No lock taken here as 'root' is not expected to be visible to other
  * processes. */
 static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
@@ -177,7 +193,7 @@ void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci)
 	if (tree)
 		root = ci->ci_cache.ci_tree;
 
-	ocfs2_metadata_cache_init(ci, ci->ci_ops);
+	ocfs2_metadata_cache_reset(ci, 0);
 	ocfs2_metadata_cache_unlock(ci);
 
 	purged = ocfs2_purge_copied_metadata_tree(&root);
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index f268273d6516..80dbb1db0a5a 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -61,6 +61,7 @@ void exit_ocfs2_uptodate_cache(void);
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
 			       const struct ocfs2_caching_operations *ops);
 void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci);
+void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci);
 
 u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci);
 struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
-- 
cgit v1.2.3


From 292dd27ec76b96cebcef576f330ab121f59ccf05 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 15:41:59 -0800
Subject: ocfs2: move ip_created_trans to struct ocfs2_caching_info

Similar ip_last_trans, ip_created_trans tracks the creation of a journal
managed inode.  This specifically tracks what transaction created the
inode.  This is so the code can know if the inode has ever been written
to disk.

This behavior is desirable for any journal managed object.  We move it
to struct ocfs2_caching_info as ci_created_trans so that any object
using ocfs2_caching_info can rely on this behavior.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/inode.c    | 1 -
 fs/ocfs2/inode.h    | 4 ----
 fs/ocfs2/journal.h  | 6 +++---
 fs/ocfs2/ocfs2.h    | 3 +++
 fs/ocfs2/super.c    | 1 -
 fs/ocfs2/uptodate.c | 4 +++-
 6 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index a47750dea059..8a9e7085e99f 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1147,7 +1147,6 @@ void ocfs2_clear_inode(struct inode *inode)
 
 	/* Clear all other flags. */
 	oi->ip_flags = 0;
-	oi->ip_created_trans = 0;
 	oi->ip_dir_start_lookup = 0;
 	oi->ip_blkno = 0ULL;
 
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 2cae2514e7fb..67392f60629d 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -60,10 +60,6 @@ struct ocfs2_inode_info
 
 	u32				ip_dir_start_lookup;
 
-	/* next two are protected by trans_inc_lock */
-	/* which transaction were we created on? Zero if none. */
-	unsigned long			ip_created_trans;
-
 	struct ocfs2_caching_info	ip_metadata_cache;
 
 	struct ocfs2_extent_map		ip_extent_map;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d4ac19739d7c..0bb6754c73f4 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -129,9 +129,9 @@ static inline int ocfs2_inode_is_new(struct inode *inode)
 		return 0;
 	spin_lock(&trans_inc_lock);
 	ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->j_trans_id,
-			   OCFS2_I(inode)->ip_created_trans));
+			   INODE_CACHE(inode)->ci_created_trans));
 	if (!ret)
-		OCFS2_I(inode)->ip_created_trans = 0;
+		INODE_CACHE(inode)->ci_created_trans = 0;
 	spin_unlock(&trans_inc_lock);
 	return ret;
 }
@@ -140,7 +140,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 				       struct inode *inode)
 {
 	spin_lock(&trans_inc_lock);
-	OCFS2_I(inode)->ip_created_trans = osb->journal->j_trans_id;
+	INODE_CACHE(inode)->ci_created_trans = osb->journal->j_trans_id;
 	spin_unlock(&trans_inc_lock);
 }
 
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9bd7ce30ba6..18b5fea98c91 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -77,6 +77,9 @@ struct ocfs2_caching_info {
 	 */
 	const struct ocfs2_caching_operations *ci_ops;
 
+	/* next two are protected by trans_inc_lock */
+	/* which transaction were we created on? Zero if none. */
+	unsigned long		ci_created_trans;
 	/* last transaction we were a part of. */
 	unsigned long		ci_last_trans;
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4212547e78a2..e35a5052ce3a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1668,7 +1668,6 @@ static void ocfs2_inode_init_once(void *data)
 	spin_lock_init(&oi->ip_lock);
 	ocfs2_extent_map_init(&oi->vfs_inode);
 	INIT_LIST_HEAD(&oi->ip_io_markers);
-	oi->ip_created_trans = 0;
 	oi->ip_dir_start_lookup = 0;
 
 	init_rwsem(&oi->ip_alloc_sem);
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 81c82200b908..b6284f235d2f 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -124,8 +124,10 @@ static void ocfs2_metadata_cache_reset(struct ocfs2_caching_info *ci,
 	ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
 	ci->ci_num_cached = 0;
 
-	if (clear)
+	if (clear) {
+		ci->ci_created_trans = 0;
 		ci->ci_last_trans = 0;
+	}
 }
 
 void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
-- 
cgit v1.2.3


From 0cf2f7632b1789b811ab20b611c4156e6de2b055 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 16:41:25 -0800
Subject: ocfs2: Pass struct ocfs2_caching_info to the journal functions.

The next step in divorcing metadata I/O management from struct inode is
to pass struct ocfs2_caching_info to the journal functions.  Thus the
journal locks a metadata cache with the cache io_lock function.  It also
can compare ci_last_trans and ci_created_trans directly.

This is a large patch because of all the places we change
ocfs2_journal_access..(handle, inode, ...) to
ocfs2_journal_access..(handle, INODE_CACHE(inode), ...).

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c        | 103 ++++++++++++++++++++++++------------------------
 fs/ocfs2/aops.c         |   4 +-
 fs/ocfs2/dir.c          |  52 +++++++++++++-----------
 fs/ocfs2/dlmglue.c      |   2 +-
 fs/ocfs2/file.c         |   8 ++--
 fs/ocfs2/inode.c        |   7 ++--
 fs/ocfs2/journal.c      |  65 ++++++++++++++----------------
 fs/ocfs2/journal.h      |  75 +++++++++++++++++++----------------
 fs/ocfs2/localalloc.c   |  10 +++--
 fs/ocfs2/namei.c        |  36 +++++++++++------
 fs/ocfs2/ocfs2.h        |   8 +++-
 fs/ocfs2/quota_global.c |   3 +-
 fs/ocfs2/quota_local.c  |  20 ++++++----
 fs/ocfs2/resize.c       |  14 +++----
 fs/ocfs2/suballoc.c     |  29 +++++++-------
 fs/ocfs2/uptodate.h     |   1 -
 fs/ocfs2/xattr.c        |  33 +++++++++-------
 17 files changed, 254 insertions(+), 216 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d5dffcfa192a..616afa9f7bd1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -415,11 +415,11 @@ static inline void ocfs2_et_update_clusters(struct inode *inode,
 }
 
 static inline int ocfs2_et_root_journal_access(handle_t *handle,
-					       struct inode *inode,
+					       struct ocfs2_caching_info *ci,
 					       struct ocfs2_extent_tree *et,
 					       int type)
 {
-	return et->et_root_journal_access(handle, inode, et->et_root_bh,
+	return et->et_root_journal_access(handle, ci, et->et_root_bh,
 					  type);
 }
 
@@ -633,7 +633,7 @@ static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
  * ocfs2_journal_access_path(), but I don't have a better one.
  */
 static int ocfs2_path_bh_journal_access(handle_t *handle,
-					struct inode *inode,
+					struct ocfs2_caching_info *ci,
 					struct ocfs2_path *path,
 					int idx)
 {
@@ -645,14 +645,15 @@ static int ocfs2_path_bh_journal_access(handle_t *handle,
 	if (idx)
 		access = ocfs2_journal_access_eb;
 
-	return access(handle, inode, path->p_node[idx].bh,
+	return access(handle, ci, path->p_node[idx].bh,
 		      OCFS2_JOURNAL_ACCESS_WRITE);
 }
 
 /*
  * Convenience function to journal all components in a path.
  */
-static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
+static int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
+				     handle_t *handle,
 				     struct ocfs2_path *path)
 {
 	int i, ret = 0;
@@ -661,7 +662,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
 		goto out;
 
 	for(i = 0; i < path_num_items(path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, inode, path, i);
+		ret = ocfs2_path_bh_journal_access(handle, ci, path, i);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -952,7 +953,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
 			ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
 						      bhs[i]);
 
-			status = ocfs2_journal_access_eb(handle, inode, bhs[i],
+			status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), bhs[i],
 							 OCFS2_JOURNAL_ACCESS_CREATE);
 			if (status < 0) {
 				mlog_errno(status);
@@ -1051,7 +1052,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
 		goto out;
 	}
 
-	status = ocfs2_journal_access_path(inode, handle, path);
+	status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out;
@@ -1162,7 +1163,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
 		eb_el = &eb->h_list;
 
-		status = ocfs2_journal_access_eb(handle, inode, bh,
+		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), bh,
 						 OCFS2_JOURNAL_ACCESS_CREATE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1202,20 +1203,20 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 	 * journal_dirty erroring as it won't unless we've aborted the
 	 * handle (in which case we would never be here) so reserving
 	 * the write with journal_access is all we need to do. */
-	status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh,
+	status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), *last_eb_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
-	status = ocfs2_et_root_journal_access(handle, inode, et,
+	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 	if (eb_bh) {
-		status = ocfs2_journal_access_eb(handle, inode, eb_bh,
+		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), eb_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1305,7 +1306,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 	eb_el = &eb->h_list;
 	root_el = et->et_root_el;
 
-	status = ocfs2_journal_access_eb(handle, inode, new_eb_bh,
+	status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), new_eb_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1324,7 +1325,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_et_root_journal_access(handle, inode, et,
+	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -2095,7 +2096,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	root_bh = left_path->p_node[subtree_index].bh;
 	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
@@ -2103,14 +2104,14 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
@@ -2503,7 +2504,7 @@ static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2654,7 +2655,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 			return -EAGAIN;
 
 		if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
-			ret = ocfs2_journal_access_eb(handle, inode,
+			ret = ocfs2_journal_access_eb(handle, INODE_CACHE(inode),
 						      path_leaf_bh(right_path),
 						      OCFS2_JOURNAL_ACCESS_WRITE);
 			if (ret) {
@@ -2673,7 +2674,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 		 * We have to update i_last_eb_blk during the meta
 		 * data delete.
 		 */
-		ret = ocfs2_et_root_journal_access(handle, inode, et,
+		ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -2689,7 +2690,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	 */
 	BUG_ON(right_has_empty && !del_right_subtree);
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
@@ -2697,14 +2698,14 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
@@ -2864,7 +2865,7 @@ static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
 	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
 		return 0;
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), path,
 					   path_num_items(path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -2947,7 +2948,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 		 * Caller might still want to make changes to the
 		 * tree root, so re-add it to the journal here.
 		 */
-		ret = ocfs2_path_bh_journal_access(handle, inode,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 						   left_path, 0);
 		if (ret) {
 			mlog_errno(ret);
@@ -3025,7 +3026,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3055,7 +3056,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 			goto out;
 		}
 
-		ret = ocfs2_journal_access_path(inode, handle, left_path);
+		ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3370,7 +3371,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3379,14 +3380,14 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_path_bh_journal_access(handle, inode,
+			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_path_bh_journal_access(handle, inode,
+			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
@@ -3399,7 +3400,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		right_rec = &el->l_recs[index + 1];
 	}
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, left_path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), left_path,
 					   path_num_items(left_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -3539,7 +3540,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3548,14 +3549,14 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_path_bh_journal_access(handle, inode,
+			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_path_bh_journal_access(handle, inode,
+			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
 							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
@@ -3568,7 +3569,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 			has_empty_extent = 1;
 	}
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
 					   path_num_items(right_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -4006,7 +4007,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 		}
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, right_path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, right_path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4135,7 +4136,7 @@ static int ocfs2_insert_path(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_journal_access_path(inode, handle, left_path);
+		ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -4146,7 +4147,7 @@ static int ocfs2_insert_path(struct inode *inode,
 	 * Pass both paths to the journal. The majority of inserts
 	 * will be touching all components anyway.
 	 */
-	ret = ocfs2_journal_access_path(inode, handle, right_path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, right_path);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -4211,7 +4212,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 
 	el = et->et_root_el;
 
-	ret = ocfs2_et_root_journal_access(handle, inode, et,
+	ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4273,7 +4274,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		 * ocfs2_rotate_tree_right() might have extended the
 		 * transaction without re-journaling our tree root.
 		 */
-		ret = ocfs2_et_root_journal_access(handle, inode, et,
+		ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -4796,7 +4797,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	BUG_ON(num_bits > clusters_to_add);
 
 	/* reserve our write early -- insert_extent may update the tree root */
-	status = ocfs2_et_root_journal_access(handle, inode, et,
+	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -4971,7 +4972,7 @@ static int ocfs2_replace_extent_rec(struct inode *inode,
 {
 	int ret;
 
-	ret = ocfs2_path_bh_journal_access(handle, inode, path,
+	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), path,
 					   path_num_items(path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -5333,13 +5334,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(inode, handle, left_path);
+	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5574,7 +5575,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_et_root_journal_access(handle, inode, et,
+	ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -5691,7 +5692,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -5753,7 +5754,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
 	while (i >= 0) {
 		/* Caller has given us at least enough credits to
 		 * update the truncate log dinode */
-		status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
+		status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -6770,14 +6771,14 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 	 * Each component will be touched, so we might as well journal
 	 * here to avoid having to handle errors later.
 	 */
-	status = ocfs2_journal_access_path(inode, handle, path);
+	status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 
 	if (last_eb_bh) {
-		status = ocfs2_journal_access_eb(handle, inode, last_eb_bh,
+		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -7139,7 +7140,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		goto out_unlock;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -7508,7 +7509,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8a1e61545f41..49eef2c6f4aa 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1528,7 +1528,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		ocfs2_commit_trans(osb, handle);
@@ -1773,7 +1773,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 	 * We don't want this to fail in ocfs2_write_end(), so do it
 	 * here.
 	 */
-	ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 273fb7648fce..073ab34b8c2a 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -176,7 +176,7 @@ static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle,
 	struct ocfs2_dx_root_block *dx_root;
 	struct ocfs2_dir_block_trailer *trailer;
 
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1136,7 +1136,8 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle,
 	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
 		access = ocfs2_journal_access_di;
 
-	ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = access(handle, INODE_CACHE(dir), de_bh,
+		     OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -1179,7 +1180,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
 			goto bail;
 		}
 		if (de == de_del)  {
-			status = access(handle, dir, bh,
+			status = access(handle, INODE_CACHE(dir), bh,
 					OCFS2_JOURNAL_ACCESS_WRITE);
 			if (status < 0) {
 				status = -EIO;
@@ -1329,7 +1330,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
 	 * the entry count needs to be updated. Also, we might be
 	 * adding to the start of the free list.
 	 */
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1337,7 +1338,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
 	}
 
 	if (!ocfs2_dx_root_inline(dx_root)) {
-		ret = ocfs2_journal_access_dl(handle, dir,
+		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
 					      lookup->dl_dx_leaf_bh,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
@@ -1496,7 +1497,7 @@ static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
 	int ret;
 	struct ocfs2_dx_leaf *dx_leaf;
 
-	ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
+	ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1526,7 +1527,7 @@ static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
 	struct ocfs2_dx_root_block *dx_root;
 	struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
 
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1648,11 +1649,13 @@ int __ocfs2_add_entry(handle_t *handle,
 		 */
 		if (ocfs2_free_list_at_root(lookup)) {
 			bh = lookup->dl_dx_root_bh;
-			retval = ocfs2_journal_access_dr(handle, dir, bh,
+			retval = ocfs2_journal_access_dr(handle,
+						 INODE_CACHE(dir), bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		} else {
 			bh = lookup->dl_prev_leaf_bh;
-			retval = ocfs2_journal_access_db(handle, dir, bh,
+			retval = ocfs2_journal_access_db(handle,
+						 INODE_CACHE(dir), bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		}
 		if (retval) {
@@ -1703,11 +1706,13 @@ int __ocfs2_add_entry(handle_t *handle,
 			}
 
 			if (insert_bh == parent_fe_bh)
-				status = ocfs2_journal_access_di(handle, dir,
+				status = ocfs2_journal_access_di(handle,
+								 INODE_CACHE(dir),
 								 insert_bh,
 								 OCFS2_JOURNAL_ACCESS_WRITE);
 			else {
-				status = ocfs2_journal_access_db(handle, dir,
+				status = ocfs2_journal_access_db(handle,
+								 INODE_CACHE(dir),
 								 insert_bh,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 
@@ -2283,7 +2288,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
 	struct ocfs2_inline_data *data = &di->id2.i_data;
 	unsigned int size = le16_to_cpu(data->id_count);
 
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -2337,7 +2342,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
 
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
 
-	status = ocfs2_journal_access_db(handle, inode, new_bh,
+	status = ocfs2_journal_access_db(handle, INODE_CACHE(inode), new_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -2423,7 +2428,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 	}
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
 
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -2457,7 +2462,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 	if (ret)
 		mlog_errno(ret);
 
-	ret = ocfs2_journal_access_di(handle, dir, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
@@ -2500,7 +2505,7 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
 
 		ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh);
 
-		ret = ocfs2_journal_access_dl(handle, dir, bh,
+		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -3010,7 +3015,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh);
 
-	ret = ocfs2_journal_access_db(handle, dir, dirdata_bh,
+	ret = ocfs2_journal_access_db(handle, INODE_CACHE(dir), dirdata_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
@@ -3063,7 +3068,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	 * We let the later dirent insert modify c/mtime - to the user
 	 * the data hasn't changed.
 	 */
-	ret = ocfs2_journal_access_di(handle, dir, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
 				      OCFS2_JOURNAL_ACCESS_CREATE);
 	if (ret) {
 		mlog_errno(ret);
@@ -3392,7 +3397,7 @@ do_extend:
 
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh);
 
-	status = ocfs2_journal_access_db(handle, dir, new_bh,
+	status = ocfs2_journal_access_db(handle, INODE_CACHE(dir), new_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -3888,7 +3893,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 	}
 	did_quota = 1;
 
-	ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
+	ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -3952,7 +3957,8 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 	}
 
 	for (i = 0; i < num_dx_leaves; i++) {
-		ret = ocfs2_journal_access_dl(handle, dir, orig_dx_leaves[i],
+		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
+					      orig_dx_leaves[i],
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -4168,7 +4174,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 	 * failure to add the dx_root_bh to the journal won't result
 	 * us losing clusters.
 	 */
-	ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4472,7 +4478,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
 		goto out_unlock;
 	}
 
-	ret = ocfs2_journal_access_di(handle, dir, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index fe15cee0322a..f518d1bee30a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3499,7 +3499,7 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level)
 {
 	struct inode *inode = ocfs2_lock_res_inode(lockres);
-	int checkpointed = ocfs2_inode_fully_checkpointed(inode);
+	int checkpointed = ocfs2_ci_fully_checkpointed(INODE_CACHE(inode));
 
 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index aa501d3f93f1..3ddbc5e917e2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -259,7 +259,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -356,7 +356,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 		goto out;
 	}
 
-	status = ocfs2_journal_access_di(handle, inode, fe_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -593,7 +593,7 @@ restarted_transaction:
 	/* reserve a write to the file entry early on - that we if we
 	 * run out of credits in the allocation path, we can still
 	 * update i_size. */
-	status = ocfs2_journal_access_di(handle, inode, bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1131,7 +1131,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 8a9e7085e99f..179c819e52ec 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -562,7 +562,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 			goto out;
 		}
 
-		status = ocfs2_journal_access_di(handle, inode, fe_bh,
+		status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+						 fe_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -646,7 +647,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 	}
 
 	/* set the inodes dtime */
-	status = ocfs2_journal_access_di(handle, inode, di_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1238,7 +1239,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
 	mlog_entry("(inode %llu)\n",
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
-	status = ocfs2_journal_access_di(handle, inode, bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index ddf08d384ba1..5b6c0e441445 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -48,6 +48,7 @@
 #include "slot_map.h"
 #include "super.h"
 #include "sysfile.h"
+#include "uptodate.h"
 #include "quota.h"
 
 #include "buffer_head_io.h"
@@ -601,14 +602,16 @@ static struct ocfs2_triggers dl_triggers = {
 };
 
 static int __ocfs2_journal_access(handle_t *handle,
-				  struct inode *inode,
+				  struct ocfs2_caching_info *ci,
 				  struct buffer_head *bh,
 				  struct ocfs2_triggers *triggers,
 				  int type)
 {
 	int status;
+	struct ocfs2_super *osb =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
 
-	BUG_ON(!inode);
+	BUG_ON(!ci || !ci->ci_ops);
 	BUG_ON(!handle);
 	BUG_ON(!bh);
 
@@ -627,15 +630,15 @@ static int __ocfs2_journal_access(handle_t *handle,
 		BUG();
 	}
 
-	/* Set the current transaction information on the inode so
+	/* Set the current transaction information on the ci so
 	 * that the locking code knows whether it can drop it's locks
-	 * on this inode or not. We're protected from the commit
+	 * on this ci or not. We're protected from the commit
 	 * thread updating the current transaction id until
 	 * ocfs2_commit_trans() because ocfs2_start_trans() took
 	 * j_trans_barrier for us. */
-	ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode);
+	ocfs2_set_ci_lock_trans(osb->journal, ci);
 
-	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_lock(ci);
 	switch (type) {
 	case OCFS2_JOURNAL_ACCESS_CREATE:
 	case OCFS2_JOURNAL_ACCESS_WRITE:
@@ -650,9 +653,9 @@ static int __ocfs2_journal_access(handle_t *handle,
 		status = -EINVAL;
 		mlog(ML_ERROR, "Uknown access type!\n");
 	}
-	if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers)
+	if (!status && ocfs2_meta_ecc(osb) && triggers)
 		jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
-	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
+	ocfs2_metadata_cache_io_unlock(ci);
 
 	if (status < 0)
 		mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
@@ -662,66 +665,58 @@ static int __ocfs2_journal_access(handle_t *handle,
 	return status;
 }
 
-int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
-			       struct buffer_head *bh, int type)
+int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
+			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &di_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
 }
 
-int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &eb_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
 }
 
-int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &gd_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
 }
 
-int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &db_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
 }
 
-int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &xb_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
 }
 
-int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &dq_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
 }
 
-int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &dr_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
 }
 
-int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, &dl_triggers,
-				      type);
+	return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
 }
 
-int ocfs2_journal_access(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
 			 struct buffer_head *bh, int type)
 {
-	return __ocfs2_journal_access(handle, inode, bh, NULL, type);
+	return __ocfs2_journal_access(handle, ci, bh, NULL, type);
 }
 
 int ocfs2_journal_dirty(handle_t *handle,
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 0bb6754c73f4..6163f28badda 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -90,57 +90,66 @@ static inline unsigned long ocfs2_inc_trans_id(struct ocfs2_journal *j)
 	return old_id;
 }
 
-static inline void ocfs2_set_inode_lock_trans(struct ocfs2_journal *journal,
-					      struct inode *inode)
+static inline void ocfs2_set_ci_lock_trans(struct ocfs2_journal *journal,
+					   struct ocfs2_caching_info *ci)
 {
 	spin_lock(&trans_inc_lock);
-	INODE_CACHE(inode)->ci_last_trans = journal->j_trans_id;
+	ci->ci_last_trans = journal->j_trans_id;
 	spin_unlock(&trans_inc_lock);
 }
 
 /* Used to figure out whether it's safe to drop a metadata lock on an
- * inode. Returns true if all the inodes changes have been
+ * cached object. Returns true if all the object's changes have been
  * checkpointed to disk. You should be holding the spinlock on the
  * metadata lock while calling this to be sure that nobody can take
  * the lock and put it on another transaction. */
-static inline int ocfs2_inode_fully_checkpointed(struct inode *inode)
+static inline int ocfs2_ci_fully_checkpointed(struct ocfs2_caching_info *ci)
 {
 	int ret;
-	struct ocfs2_journal *journal = OCFS2_SB(inode->i_sb)->journal;
+	struct ocfs2_journal *journal =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal;
 
 	spin_lock(&trans_inc_lock);
-	ret = time_after(journal->j_trans_id,
-			 INODE_CACHE(inode)->ci_last_trans);
+	ret = time_after(journal->j_trans_id, ci->ci_last_trans);
 	spin_unlock(&trans_inc_lock);
 	return ret;
 }
 
-/* convenience function to check if an inode is still new (has never
- * hit disk) Will do you a favor and set created_trans = 0 when you've
- * been checkpointed.  returns '1' if the inode is still new. */
-static inline int ocfs2_inode_is_new(struct inode *inode)
+/* convenience function to check if an object backed by struct
+ * ocfs2_caching_info  is still new (has never hit disk) Will do you a
+ * favor and set created_trans = 0 when you've
+ * been checkpointed.  returns '1' if the ci is still new. */
+static inline int ocfs2_ci_is_new(struct ocfs2_caching_info *ci)
 {
 	int ret;
+	struct ocfs2_journal *journal =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal;
+
+	spin_lock(&trans_inc_lock);
+	ret = !(time_after(journal->j_trans_id, ci->ci_created_trans));
+	if (!ret)
+		ci->ci_created_trans = 0;
+	spin_unlock(&trans_inc_lock);
+	return ret;
+}
 
+/* Wrapper for inodes so we can check system files */
+static inline int ocfs2_inode_is_new(struct inode *inode)
+{
 	/* System files are never "new" as they're written out by
 	 * mkfs. This helps us early during mount, before we have the
 	 * journal open and j_trans_id could be junk. */
 	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
 		return 0;
-	spin_lock(&trans_inc_lock);
-	ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->j_trans_id,
-			   INODE_CACHE(inode)->ci_created_trans));
-	if (!ret)
-		INODE_CACHE(inode)->ci_created_trans = 0;
-	spin_unlock(&trans_inc_lock);
-	return ret;
+
+	return ocfs2_ci_is_new(INODE_CACHE(inode));
 }
 
-static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
-				       struct inode *inode)
+static inline void ocfs2_ci_set_new(struct ocfs2_super *osb,
+				    struct ocfs2_caching_info *ci)
 {
 	spin_lock(&trans_inc_lock);
-	INODE_CACHE(inode)->ci_created_trans = osb->journal->j_trans_id;
+	ci->ci_created_trans = osb->journal->j_trans_id;
 	spin_unlock(&trans_inc_lock);
 }
 
@@ -201,7 +210,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
 	if (ocfs2_mount_local(osb))
 		return;
 
-	if (!ocfs2_inode_fully_checkpointed(inode)) {
+	if (!ocfs2_ci_fully_checkpointed(INODE_CACHE(inode))) {
 		/* WARNING: This only kicks off a single
 		 * checkpoint. If someone races you and adds more
 		 * metadata to the journal, you won't know, and will
@@ -211,7 +220,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
 		ocfs2_start_checkpoint(osb);
 
 		wait_event(osb->journal->j_checkpointed,
-			   ocfs2_inode_fully_checkpointed(inode));
+			   ocfs2_ci_fully_checkpointed(INODE_CACHE(inode)));
 	}
 }
 
@@ -267,31 +276,31 @@ int			     ocfs2_extend_trans(handle_t *handle, int nblocks);
 
 
 /* ocfs2_inode */
-int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_extent_block */
-int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_group_desc */
-int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_xattr_block */
-int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* quota blocks */
-int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* dirblock */
-int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_dx_root_block */
-int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* ocfs2_dx_leaf */
-int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
 /* Anything that has no ecc */
-int ocfs2_journal_access(handle_t *handle, struct inode *inode,
+int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
 			 struct buffer_head *bh, int type);
 
 /*
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index da5dd6a70e16..ac10f83edb95 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -297,8 +297,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	}
 	memcpy(alloc_copy, alloc, bh->b_size);
 
-	status = ocfs2_journal_access_di(handle, local_alloc_inode, bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
+					 bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_commit;
@@ -678,7 +678,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 	 * delete bits from it! */
 	*num_bits = bits_wanted;
 
-	status = ocfs2_journal_access_di(handle, local_alloc_inode,
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(local_alloc_inode),
 					 osb->local_alloc_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
@@ -1156,7 +1157,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 	}
 	memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
 
-	status = ocfs2_journal_access_di(handle, local_alloc_inode,
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(local_alloc_inode),
 					 osb->local_alloc_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 689761b57a18..c07217ad8796 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -375,7 +375,8 @@ static int ocfs2_mknod(struct inode *dir,
 			goto leave;
 		}
 
-		status = ocfs2_journal_access_di(handle, dir, parent_fe_bh,
+		status = ocfs2_journal_access_di(handle, INODE_CACHE(dir),
+						 parent_fe_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -509,7 +510,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	}
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), *new_fe_bh);
 
-	status = ocfs2_journal_access_di(handle, inode, *new_fe_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+					 *new_fe_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -565,7 +567,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	}
 
 	ocfs2_populate_inode(inode, fe, 1);
-	ocfs2_inode_set_new(osb, inode);
+	ocfs2_ci_set_new(osb, INODE_CACHE(inode));
 	if (!ocfs2_mount_local(osb)) {
 		status = ocfs2_create_new_inode_locks(inode);
 		if (status < 0)
@@ -682,7 +684,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto out_unlock_inode;
 	}
 
-	err = ocfs2_journal_access_di(handle, inode, fe_bh,
+	err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (err < 0) {
 		mlog_errno(err);
@@ -866,7 +868,7 @@ static int ocfs2_unlink(struct inode *dir,
 		goto leave;
 	}
 
-	status = ocfs2_journal_access_di(handle, inode, fe_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1284,7 +1286,8 @@ static int ocfs2_rename(struct inode *old_dir,
 				goto bail;
 			}
 		}
-		status = ocfs2_journal_access_di(handle, new_inode, newfe_bh,
+		status = ocfs2_journal_access_di(handle, INODE_CACHE(new_inode),
+						 newfe_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1331,7 +1334,8 @@ static int ocfs2_rename(struct inode *old_dir,
 	old_inode->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(old_inode);
 
-	status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode),
+					 old_inode_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status >= 0) {
 		old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
@@ -1407,9 +1411,10 @@ static int ocfs2_rename(struct inode *old_dir,
 			     (int)old_dir_nlink, old_dir->i_nlink);
 		} else {
 			struct ocfs2_dinode *fe;
-			status = ocfs2_journal_access_di(handle, old_dir,
-						      old_dir_bh,
-						      OCFS2_JOURNAL_ACCESS_WRITE);
+			status = ocfs2_journal_access_di(handle,
+							 INODE_CACHE(old_dir),
+							 old_dir_bh,
+							 OCFS2_JOURNAL_ACCESS_WRITE);
 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
 			ocfs2_set_links_count(fe, old_dir->i_nlink);
 			status = ocfs2_journal_dirty(handle, old_dir_bh);
@@ -1530,7 +1535,8 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
 					      bhs[virtual]);
 
-		status = ocfs2_journal_access(handle, inode, bhs[virtual],
+		status = ocfs2_journal_access(handle, INODE_CACHE(inode),
+					      bhs[virtual],
 					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1918,7 +1924,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh,
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(orphan_dir_inode),
+					 orphan_dir_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -2003,7 +2011,9 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 		goto leave;
 	}
 
-	status = ocfs2_journal_access_di(handle,orphan_dir_inode,  orphan_dir_bh,
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(orphan_dir_inode),
+					 orphan_dir_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 18b5fea98c91..d370262b3621 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -91,6 +91,11 @@ struct ocfs2_caching_info {
 		struct rb_root	ci_tree;
 	} ci_cache;
 };
+/*
+ * Need this prototype here instead of in uptodate.h because journal.h
+ * uses it.
+ */
+struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
 
 /* this limits us to 256 nodes
  * if we need more, we can do a kmalloc for the map */
@@ -408,7 +413,8 @@ struct ocfs2_super
 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
 
 /* Useful typedef for passing around journal access functions */
-typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode,
+typedef int (*ocfs2_journal_access_func)(handle_t *handle,
+					 struct ocfs2_caching_info *ci,
 					 struct buffer_head *bh, int type);
 
 static inline int ocfs2_should_order_data(struct inode *inode)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 0d7125bb71d9..7eadf8bf1e1f 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -254,7 +254,8 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 	ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh);
-	err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type);
+	err = ocfs2_journal_access_dq(handle, INODE_CACHE(gqinode), bh,
+				      ja_type);
 	if (err < 0) {
 		brelse(bh);
 		goto out;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 3df2954ac83b..1a2c50a759fa 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -108,7 +108,7 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
 		mlog_errno(status);
 		return status;
 	}
-	status = ocfs2_journal_access_dq(handle, inode, bh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(inode), bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -510,7 +510,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 				goto out_commit;
 			}
 			/* Release local quota file entry */
-			status = ocfs2_journal_access_dq(handle, lqinode,
+			status = ocfs2_journal_access_dq(handle,
+					INODE_CACHE(lqinode),
 					qbh, OCFS2_JOURNAL_ACCESS_WRITE);
 			if (status < 0) {
 				mlog_errno(status);
@@ -619,7 +620,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
 			mlog_errno(status);
 			goto out_bh;
 		}
-		status = ocfs2_journal_access_dq(handle, lqinode, bh,
+		status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
+						 bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -994,7 +996,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 	}
 	dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
-	status = ocfs2_journal_access_dq(handle, lqinode, bh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1028,7 +1030,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 		goto out_trans;
 	}
 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), dbh);
-	status = ocfs2_journal_access_dq(handle, lqinode, dbh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), dbh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1143,7 +1145,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 		goto out;
 	}
 	/* Zero created block */
-	status = ocfs2_journal_access_dq(handle, lqinode, bh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh,
 				 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1158,7 +1160,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 		goto out_trans;
 	}
 	/* Update chunk header */
-	status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
+	status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
+					 chunk->qc_headerbh,
 				 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1292,7 +1295,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
 		goto out;
 	}
 
-	status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type],
+	status = ocfs2_journal_access_dq(handle,
+			INODE_CACHE(sb_dqopt(sb)->files[type]),
 			od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 7465f0fded77..3c3d673a4d20 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
 	mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n",
 		   new_clusters, first_new_cluster);
 
-	ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode),
+				      group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -141,7 +141,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
 	}
 
 	/* update the inode accordingly. */
-	ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -536,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 	cl = &fe->id2.i_chain;
 	cr = &cl->cl_recs[input->chain];
 
-	ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_gd(handle, INODE_CACHE(main_bm_inode),
+				      group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -552,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 		goto out_commit;
 	}
 
-	ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh,
-				      OCFS2_JOURNAL_ACCESS_WRITE);
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
+				      main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out_commit;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 21aaaaaaa2d3..a6c442c82e3d 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -352,7 +352,7 @@ static int ocfs2_block_group_fill(handle_t *handle,
 	}
 
 	status = ocfs2_journal_access_gd(handle,
-					 alloc_inode,
+					 INODE_CACHE(alloc_inode),
 					 bg_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
@@ -491,7 +491,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	bg = (struct ocfs2_group_desc *) bg_bh->b_data;
 
-	status = ocfs2_journal_access_di(handle, alloc_inode,
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
 					 bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1033,7 +1033,7 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
 		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
 
 	status = ocfs2_journal_access_gd(handle,
-					 alloc_inode,
+					 INODE_CACHE(alloc_inode),
 					 group_bh,
 					 journal_type);
 	if (status < 0) {
@@ -1106,7 +1106,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
 	bg_ptr = le64_to_cpu(bg->bg_next_group);
 	prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
 
-	status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh,
+	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
+					 prev_bg_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1121,8 +1122,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
 		goto out_rollback;
 	}
 
-	status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
+					 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_rollback;
@@ -1136,8 +1137,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
 		goto out_rollback;
 	}
 
-	status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
+					 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_rollback;
@@ -1288,7 +1289,7 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
 	struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
 
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -1461,7 +1462,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	/* Ok, claim our bits now: set the info on dinode, chainlist
 	 * and then the group */
 	status = ocfs2_journal_access_di(handle,
-					 alloc_inode,
+					 INODE_CACHE(alloc_inode),
 					 ac->ac_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
@@ -1907,8 +1908,8 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 	if (ocfs2_is_cluster_bitmap(alloc_inode))
 		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
 
-	status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh,
-					 journal_type);
+	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
+					 group_bh, journal_type);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -1993,8 +1994,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
 		goto bail;
 	}
 
-	status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
+					 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 80dbb1db0a5a..0d826fe2da0d 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -64,7 +64,6 @@ void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci);
 void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci);
 
 u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci);
-struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
 void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci);
 void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci);
 
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 19de5c487242..93aae7953c2e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -297,7 +297,8 @@ static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
 	int i, rc = 0;
 
 	for (i = 0; i < bucket->bu_blocks; i++) {
-		rc = ocfs2_journal_access(handle, bucket->bu_inode,
+		rc = ocfs2_journal_access(handle,
+					  INODE_CACHE(bucket->bu_inode),
 					  bucket->bu_bhs[i], type);
 		if (rc) {
 			mlog_errno(rc);
@@ -604,7 +605,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
 	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
 
-	status = vb->vb_access(handle, inode, vb->vb_bh,
+	status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -658,7 +659,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 
 	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
 
-	ret = vb->vb_access(handle, inode, vb->vb_bh,
+	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			    OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1217,7 +1218,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 			}
 
 			ret = ocfs2_journal_access(handle,
-						   inode,
+						   INODE_CACHE(inode),
 						   bh,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 			if (ret < 0) {
@@ -1268,7 +1269,7 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
 	void *val = xs->base + offs;
 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 
-	ret = vb->vb_access(handle, inode, vb->vb_bh,
+	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			    OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1296,7 +1297,7 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
 {
 	int ret;
 
-	ret = vb->vb_access(handle, inode, vb->vb_bh,
+	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			    OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1617,7 +1618,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 		}
 	}
 
-	ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -1625,7 +1626,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 	}
 
 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
-		ret = vb.vb_access(handle, inode, vb.vb_bh,
+		ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -1898,7 +1899,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 		mlog_errno(ret);
 		goto out;
 	}
-	ret = ocfs2_journal_access_di(handle, inode, di_bh,
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -2107,7 +2108,8 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 	int ret;
 
 	if (!xs->xattr_bh) {
-		ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
+		ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+					      xs->inode_bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -2125,7 +2127,8 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 		new_bh = sb_getblk(inode->i_sb, first_blkno);
 		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
 
-		ret = ocfs2_journal_access_xb(handle, inode, new_bh,
+		ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
+					      new_bh,
 					      OCFS2_JOURNAL_ACCESS_CREATE);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -2600,7 +2603,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 
 	if (!ret) {
 		/* Update inode ctime. */
-		ret = ocfs2_journal_access_di(ctxt->handle, inode,
+		ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
 					      xis->inode_bh,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
@@ -3428,7 +3431,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 	 */
 	down_write(&oi->ip_alloc_sem);
 
-	ret = ocfs2_journal_access_xb(handle, inode, xb_bh,
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4267,7 +4270,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
 	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
 
-	ret = ocfs2_journal_access_xb(handle, inode, root_bh,
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -4873,7 +4876,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_xb(handle, inode, root_bh,
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
-- 
cgit v1.2.3


From d9a0a1f83bf083b55b3c1f16efddecc31abace61 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 17:32:34 -0800
Subject: ocfs2: Store the ocfs2_caching_info on ocfs2_extent_tree.

What do we cache?  Metadata blocks.  What are most of our non-inode metadata
blocks?  Extent blocks for our btrees.  struct ocfs2_extent_tree is the
main structure for managing those.  So let's store the associated
ocfs2_caching_info there.

This means that ocfs2_et_root_journal_access() doesn't need struct inode
anymore, and any place that has an et can refer to et->et_ci instead of
INODE_CACHE(inode).

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 26 +++++++++++++-------------
 fs/ocfs2/alloc.h |  4 +++-
 2 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 616afa9f7bd1..a26294caf1cf 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -352,6 +352,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 {
 	et->et_ops = ops;
 	et->et_root_bh = bh;
+	et->et_ci = INODE_CACHE(inode);
 	et->et_root_journal_access = access;
 	if (!obj)
 		obj = (void *)bh->b_data;
@@ -415,11 +416,10 @@ static inline void ocfs2_et_update_clusters(struct inode *inode,
 }
 
 static inline int ocfs2_et_root_journal_access(handle_t *handle,
-					       struct ocfs2_caching_info *ci,
 					       struct ocfs2_extent_tree *et,
 					       int type)
 {
-	return et->et_root_journal_access(handle, ci, et->et_root_bh,
+	return et->et_root_journal_access(handle, et->et_ci, et->et_root_bh,
 					  type);
 }
 
@@ -1209,7 +1209,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		mlog_errno(status);
 		goto bail;
 	}
-	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	status = ocfs2_et_root_journal_access(handle, et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1325,7 +1325,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	status = ocfs2_et_root_journal_access(handle, et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -2674,7 +2674,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 		 * We have to update i_last_eb_blk during the meta
 		 * data delete.
 		 */
-		ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+		ret = ocfs2_et_root_journal_access(handle, et,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -3026,7 +3026,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3056,7 +3056,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 			goto out;
 		}
 
-		ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
+		ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -4212,7 +4212,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 
 	el = et->et_root_el;
 
-	ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	ret = ocfs2_et_root_journal_access(handle, et,
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
@@ -4274,7 +4274,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		 * ocfs2_rotate_tree_right() might have extended the
 		 * transaction without re-journaling our tree root.
 		 */
-		ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+		ret = ocfs2_et_root_journal_access(handle, et,
 						   OCFS2_JOURNAL_ACCESS_WRITE);
 		if (ret) {
 			mlog_errno(ret);
@@ -4797,7 +4797,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	BUG_ON(num_bits > clusters_to_add);
 
 	/* reserve our write early -- insert_extent may update the tree root */
-	status = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	status = ocfs2_et_root_journal_access(handle, et,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -5334,13 +5334,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5575,7 +5575,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_et_root_journal_access(handle, INODE_CACHE(inode), et,
+	ret = ocfs2_et_root_journal_access(handle, et,
 					   OCFS2_JOURNAL_ACCESS_WRITE);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 353254ba29e1..285d40b4b0fb 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -45,7 +45,8 @@
  *
  * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
  * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
- * functions.  With metadata ecc, we now call different journal_access
+ * functions.  It needs the ocfs2_caching_info structure associated with
+ * I/O on the tree.  With metadata ecc, we now call different journal_access
  * functions for each type of metadata, so it must have the
  * root_journal_access function.
  * ocfs2_extent_tree_operations abstract the normal operations we do for
@@ -56,6 +57,7 @@ struct ocfs2_extent_tree {
 	struct ocfs2_extent_tree_operations	*et_ops;
 	struct buffer_head			*et_root_bh;
 	struct ocfs2_extent_list		*et_root_el;
+	struct ocfs2_caching_info		*et_ci;
 	ocfs2_journal_access_func		et_root_journal_access;
 	void					*et_object;
 	unsigned int				et_max_leaf_clusters;
-- 
cgit v1.2.3


From 3d03a305ded8057155bd3c801e64ffef9f534827 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 17:49:26 -0800
Subject: ocfs2: Pass ocfs2_caching_info to ocfs2_read_extent_block().

extent blocks belong to btrees on more than just inodes, so we want to
pass the ocfs2_caching_info structure directly to
ocfs2_read_extent_block().  A number of places in alloc.c can now drop
struct inode from their argument list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c      | 38 ++++++++++++++++++++------------------
 fs/ocfs2/alloc.h      |  3 +--
 fs/ocfs2/dir.c        |  2 +-
 fs/ocfs2/extent_map.c |  4 ++--
 fs/ocfs2/suballoc.c   |  2 +-
 5 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a26294caf1cf..1ff13d3958dd 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -854,13 +854,13 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
 	return 0;
 }
 
-int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
 			    struct buffer_head **bh)
 {
 	int rc;
 	struct buffer_head *tmp = *bh;
 
-	rc = ocfs2_read_block(INODE_CACHE(inode), eb_blkno, &tmp,
+	rc = ocfs2_read_block(ci, eb_blkno, &tmp,
 			      ocfs2_validate_extent_block);
 
 	/* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -875,7 +875,6 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
  * How many free extents have we got before we need more meta data?
  */
 int ocfs2_num_free_extents(struct ocfs2_super *osb,
-			   struct inode *inode,
 			   struct ocfs2_extent_tree *et)
 {
 	int retval;
@@ -890,7 +889,8 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
 	last_eb_blk = ocfs2_et_get_last_eb_blk(et);
 
 	if (last_eb_blk) {
-		retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
+		retval = ocfs2_read_extent_block(et->et_ci, last_eb_blk,
+						 &eb_bh);
 		if (retval < 0) {
 			mlog_errno(retval);
 			goto bail;
@@ -1382,7 +1382,6 @@ bail:
  * return status < 0 indicates an error.
  */
 static int ocfs2_find_branch_target(struct ocfs2_super *osb,
-				    struct inode *inode,
 				    struct ocfs2_extent_tree *et,
 				    struct buffer_head **target_bh)
 {
@@ -1401,19 +1400,21 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
 
 	while(le16_to_cpu(el->l_tree_depth) > 1) {
 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
-			ocfs2_error(inode->i_sb, "Dinode %llu has empty "
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu has empty "
 				    "extent list (next_free_rec == 0)",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
 			status = -EIO;
 			goto bail;
 		}
 		i = le16_to_cpu(el->l_next_free_rec) - 1;
 		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
 		if (!blkno) {
-			ocfs2_error(inode->i_sb, "Dinode %llu has extent "
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu has extent "
 				    "list where extent # %d has no physical "
 				    "block start",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno, i);
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i);
 			status = -EIO;
 			goto bail;
 		}
@@ -1421,7 +1422,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
 		brelse(bh);
 		bh = NULL;
 
-		status = ocfs2_read_extent_block(inode, blkno, &bh);
+		status = ocfs2_read_extent_block(et->et_ci, blkno, &bh);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -1475,7 +1476,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
 
 	BUG_ON(meta_ac == NULL);
 
-	shift = ocfs2_find_branch_target(osb, inode, et, &bh);
+	shift = ocfs2_find_branch_target(osb, et, &bh);
 	if (shift < 0) {
 		ret = shift;
 		mlog_errno(ret);
@@ -1780,7 +1781,7 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		brelse(bh);
 		bh = NULL;
-		ret = ocfs2_read_extent_block(inode, blkno, &bh);
+		ret = ocfs2_read_extent_block(INODE_CACHE(inode), blkno, &bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3032,7 +3033,8 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
+	ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+					    path, &cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4557,7 +4559,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
 		 * ocfs2_figure_insert_type() and ocfs2_add_branch()
 		 * may want it later.
 		 */
-		ret = ocfs2_read_extent_block(inode,
+		ret = ocfs2_read_extent_block(et->et_ci,
 					      ocfs2_et_get_last_eb_blk(et),
 					      &bh);
 		if (ret) {
@@ -4760,7 +4762,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	if (mark_unwritten)
 		flags = OCFS2_EXT_UNWRITTEN;
 
-	free_extents = ocfs2_num_free_extents(osb, inode, et);
+	free_extents = ocfs2_num_free_extents(osb, et);
 	if (free_extents < 0) {
 		status = free_extents;
 		mlog_errno(status);
@@ -5048,7 +5050,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 	if (path->p_tree_depth) {
 		struct ocfs2_extent_block *eb;
 
-		ret = ocfs2_read_extent_block(inode,
+		ret = ocfs2_read_extent_block(et->et_ci,
 					      ocfs2_et_get_last_eb_blk(et),
 					      &last_eb_bh);
 		if (ret) {
@@ -5203,7 +5205,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
 
 	depth = path->p_tree_depth;
 	if (depth > 0) {
-		ret = ocfs2_read_extent_block(inode,
+		ret = ocfs2_read_extent_block(et->et_ci,
 					      ocfs2_et_get_last_eb_blk(et),
 					      &last_eb_bh);
 		if (ret < 0) {
@@ -7447,7 +7449,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 	ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
 
 	if (fe->id2.i_list.l_tree_depth) {
-		status = ocfs2_read_extent_block(inode,
+		status = ocfs2_read_extent_block(INODE_CACHE(inode),
 						 le64_to_cpu(fe->i_last_eb_blk),
 						 &last_eb_bh);
 		if (status < 0) {
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 285d40b4b0fb..ed78ee5139bc 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -86,7 +86,7 @@ void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
  * allocated.  This is a cached read.  The extent block will be validated
  * with ocfs2_validate_extent_block().
  */
-int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
 			    struct buffer_head **bh);
 
 struct ocfs2_alloc_context;
@@ -132,7 +132,6 @@ int ocfs2_remove_btree_range(struct inode *inode,
 			     struct ocfs2_cached_dealloc_ctxt *dealloc);
 
 int ocfs2_num_free_extents(struct ocfs2_super *osb,
-			   struct inode *inode,
 			   struct ocfs2_extent_tree *et);
 
 /*
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 073ab34b8c2a..00e43281b9a4 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3346,7 +3346,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
 		spin_unlock(&OCFS2_I(dir)->ip_lock);
 		ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
-		num_free_extents = ocfs2_num_free_extents(osb, dir, &et);
+		num_free_extents = ocfs2_num_free_extents(osb, &et);
 		if (num_free_extents < 0) {
 			status = num_free_extents;
 			mlog_errno(status);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index dbd8a16d5125..a5dc13e6fe76 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
 	struct ocfs2_extent_block *eb;
 	struct ocfs2_extent_list *el;
 
-	ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
+	ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -375,7 +375,7 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
 		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 			goto no_more_extents;
 
-		ret = ocfs2_read_extent_block(inode,
+		ret = ocfs2_read_extent_block(INODE_CACHE(inode),
 					      le64_to_cpu(eb->h_next_leaf_blk),
 					      &next_eb_bh);
 		if (ret) {
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index a6c442c82e3d..c30b644d9572 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2152,7 +2152,7 @@ int ocfs2_lock_allocators(struct inode *inode,
 
 	BUG_ON(clusters_to_add != 0 && data_ac == NULL);
 
-	num_free_extents = ocfs2_num_free_extents(osb, inode, et);
+	num_free_extents = ocfs2_num_free_extents(osb, et);
 	if (num_free_extents < 0) {
 		ret = num_free_extents;
 		mlog_errno(ret);
-- 
cgit v1.2.3


From facdb77f54f09a33baf6b649496f5dd1d7922a7e Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 18:08:48 -0800
Subject: ocfs2: ocfs2_find_path() only needs the caching info

ocfs2_find_path and ocfs2_find_leaf() walk our btrees, reading extent
blocks.  They need struct ocfs2_caching_info for that, but not struct
inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c      | 79 ++++++++++++++++++++++++++-------------------------
 fs/ocfs2/alloc.h      |  5 ++--
 fs/ocfs2/dir.c        |  3 +-
 fs/ocfs2/extent_map.c |  6 ++--
 fs/ocfs2/xattr.c      |  3 +-
 5 files changed, 52 insertions(+), 44 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 1ff13d3958dd..ecd97309493f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -475,8 +475,8 @@ struct ocfs2_path {
 #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
 #define path_num_items(_path) ((_path)->p_tree_depth + 1)
 
-static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
-			   u32 cpos);
+static int ocfs2_find_path(struct ocfs2_caching_info *ci,
+			   struct ocfs2_path *path, u32 cpos);
 static void ocfs2_adjust_rightmost_records(struct inode *inode,
 					   handle_t *handle,
 					   struct ocfs2_path *path,
@@ -1039,7 +1039,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
 		return status;
 	}
 
-	status = ocfs2_find_path(inode, path, UINT_MAX);
+	status = ocfs2_find_path(et->et_ci, path, UINT_MAX);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out;
@@ -1728,7 +1728,7 @@ typedef void (path_insert_t)(void *, struct buffer_head *);
  * This code can be called with a cpos larger than the tree, in which
  * case it will return the rightmost path.
  */
-static int __ocfs2_find_path(struct inode *inode,
+static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
 			     struct ocfs2_extent_list *root_el, u32 cpos,
 			     path_insert_t *func, void *data)
 {
@@ -1739,15 +1739,14 @@ static int __ocfs2_find_path(struct inode *inode,
 	struct ocfs2_extent_block *eb;
 	struct ocfs2_extent_list *el;
 	struct ocfs2_extent_rec *rec;
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
 	el = root_el;
 	while (el->l_tree_depth) {
 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu has empty extent list at "
+			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+				    "Owner %llu has empty extent list at "
 				    "depth %u\n",
-				    (unsigned long long)oi->ip_blkno,
+				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
 				    le16_to_cpu(el->l_tree_depth));
 			ret = -EROFS;
 			goto out;
@@ -1770,10 +1769,10 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
 		if (blkno == 0) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu has bad blkno in extent list "
+			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+				    "Owner %llu has bad blkno in extent list "
 				    "at depth %u (index %d)\n",
-				    (unsigned long long)oi->ip_blkno,
+				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
 				    le16_to_cpu(el->l_tree_depth), i);
 			ret = -EROFS;
 			goto out;
@@ -1781,7 +1780,7 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		brelse(bh);
 		bh = NULL;
-		ret = ocfs2_read_extent_block(INODE_CACHE(inode), blkno, &bh);
+		ret = ocfs2_read_extent_block(ci, blkno, &bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -1792,10 +1791,10 @@ static int __ocfs2_find_path(struct inode *inode,
 
 		if (le16_to_cpu(el->l_next_free_rec) >
 		    le16_to_cpu(el->l_count)) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu has bad count in extent list "
+			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+				    "Owner %llu has bad count in extent list "
 				    "at block %llu (next free=%u, count=%u)\n",
-				    (unsigned long long)oi->ip_blkno,
+				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
 				    (unsigned long long)bh->b_blocknr,
 				    le16_to_cpu(el->l_next_free_rec),
 				    le16_to_cpu(el->l_count));
@@ -1839,14 +1838,14 @@ static void find_path_ins(void *data, struct buffer_head *bh)
 	ocfs2_path_insert_eb(fp->path, fp->index, bh);
 	fp->index++;
 }
-static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
-			   u32 cpos)
+static int ocfs2_find_path(struct ocfs2_caching_info *ci,
+			   struct ocfs2_path *path, u32 cpos)
 {
 	struct find_path_data data;
 
 	data.index = 1;
 	data.path = path;
-	return __ocfs2_find_path(inode, path_root_el(path), cpos,
+	return __ocfs2_find_path(ci, path_root_el(path), cpos,
 				 find_path_ins, &data);
 }
 
@@ -1871,13 +1870,14 @@ static void find_leaf_ins(void *data, struct buffer_head *bh)
  *
  * This function doesn't handle non btree extent lists.
  */
-int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
-		    u32 cpos, struct buffer_head **leaf_bh)
+int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
+		    struct ocfs2_extent_list *root_el, u32 cpos,
+		    struct buffer_head **leaf_bh)
 {
 	int ret;
 	struct buffer_head *bh = NULL;
 
-	ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh);
+	ret = __ocfs2_find_path(ci, root_el, cpos, find_leaf_ins, &bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2382,7 +2382,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 		mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n",
 		     insert_cpos, cpos);
 
-		ret = ocfs2_find_path(inode, left_path, cpos);
+		ret = ocfs2_find_path(INODE_CACHE(inode), left_path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2923,7 +2923,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 	}
 
 	while (right_cpos) {
-		ret = ocfs2_find_path(inode, right_path, right_cpos);
+		ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3052,7 +3052,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 			goto out;
 		}
 
-		ret = ocfs2_find_path(inode, left_path, cpos);
+		ret = ocfs2_find_path(et->et_ci, left_path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3297,7 +3297,7 @@ static int ocfs2_get_right_path(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(inode, right_path, right_cpos);
+	ret = ocfs2_find_path(INODE_CACHE(inode), right_path, right_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3466,7 +3466,7 @@ static int ocfs2_get_left_path(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(inode, left_path, left_cpos);
+	ret = ocfs2_find_path(INODE_CACHE(inode), left_path, left_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3996,7 +3996,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 				goto out;
 			}
 
-			ret = ocfs2_find_path(inode, left_path, left_cpos);
+			ret = ocfs2_find_path(INODE_CACHE(inode), left_path,
+					      left_cpos);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -4245,7 +4246,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		cpos = UINT_MAX;
 	}
 
-	ret = ocfs2_find_path(inode, right_path, cpos);
+	ret = ocfs2_find_path(et->et_ci, right_path, cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4342,7 +4343,8 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (!left_path)
 				goto out;
 
-			status = ocfs2_find_path(inode, left_path, left_cpos);
+			status = ocfs2_find_path(INODE_CACHE(inode),
+						 left_path, left_cpos);
 			if (status)
 				goto out;
 
@@ -4398,7 +4400,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 		if (!right_path)
 			goto out;
 
-		status = ocfs2_find_path(inode, right_path, right_cpos);
+		status = ocfs2_find_path(INODE_CACHE(inode), right_path, right_cpos);
 		if (status)
 			goto out;
 
@@ -4600,7 +4602,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
 	 * us the rightmost tree path. This is accounted for below in
 	 * the appending code.
 	 */
-	ret = ocfs2_find_path(inode, path, le32_to_cpu(insert_rec->e_cpos));
+	ret = ocfs2_find_path(et->et_ci, path, le32_to_cpu(insert_rec->e_cpos));
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -4950,7 +4952,7 @@ leftright:
 		ocfs2_reinit_path(path, 1);
 
 		cpos = le32_to_cpu(split_rec.e_cpos);
-		ret = ocfs2_find_path(inode, path, cpos);
+		ret = ocfs2_find_path(et->et_ci, path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -5148,7 +5150,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(inode, left_path, cpos);
+	ret = ocfs2_find_path(et->et_ci, left_path, cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5320,7 +5322,8 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 				goto out;
 			}
 
-			ret = ocfs2_find_path(inode, left_path, left_cpos);
+			ret = ocfs2_find_path(et->et_ci, left_path,
+					      left_cpos);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -5429,7 +5432,7 @@ int ocfs2_remove_extent(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(inode, path, cpos);
+	ret = ocfs2_find_path(et->et_ci, path, cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5494,7 +5497,7 @@ int ocfs2_remove_extent(struct inode *inode,
 		 */
 		ocfs2_reinit_path(path, 1);
 
-		ret = ocfs2_find_path(inode, path, cpos);
+		ret = ocfs2_find_path(et->et_ci, path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -6522,7 +6525,7 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_leaf(inode, path_root_el(path), cpos, &bh);
+	ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -7299,7 +7302,7 @@ start:
 	/*
 	 * Truncate always works against the rightmost tree branch.
 	 */
-	status = ocfs2_find_path(inode, path, UINT_MAX);
+	status = ocfs2_find_path(INODE_CACHE(inode), path, UINT_MAX);
 	if (status) {
 		mlog_errno(status);
 		goto bail;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index ed78ee5139bc..8718e57e70a1 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -223,8 +223,9 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
 			  unsigned int start, unsigned int end, int trunc);
 
-int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
-		    u32 cpos, struct buffer_head **leaf_bh);
+int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
+		    struct ocfs2_extent_list *root_el, u32 cpos,
+		    struct buffer_head **leaf_bh);
 int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
 
 /*
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 00e43281b9a4..088a1b5ce9ac 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -805,7 +805,8 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
 	struct ocfs2_extent_rec *rec = NULL;
 
 	if (el->l_tree_depth) {
-		ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh);
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash,
+				      &eb_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index a5dc13e6fe76..dc9482cb463a 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -428,7 +428,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
 	tree_height = le16_to_cpu(el->l_tree_depth);
 
 	if (tree_height > 0) {
-		ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
+				      &eb_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -548,7 +549,8 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 	u32 coff;
 
 	if (el->l_tree_depth) {
-		ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
+				      &eb_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 93aae7953c2e..61819b208315 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2854,7 +2854,8 @@ static int ocfs2_xattr_get_rec(struct inode *inode,
 	u64 e_blkno = 0;
 
 	if (el->l_tree_depth) {
-		ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
+				      &eb_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
-- 
cgit v1.2.3


From 42a5a7a9a5abf9a566b91c51137921957b9a14e4 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 18:49:19 -0800
Subject: ocfs2: ocfs2_create_new_meta_bhs() doesn't need struct inode.

Pass struct ocfs2_extent_tree into ocfs2_create_new_meta_bhs().  It no
longer needs struct inode or ocfs2_super.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ecd97309493f..ad41eabd8b74 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -914,9 +914,8 @@ bail:
  * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
  * l_count for you
  */
-static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
-				     handle_t *handle,
-				     struct inode *inode,
+static int ocfs2_create_new_meta_bhs(handle_t *handle,
+				     struct ocfs2_extent_tree *et,
 				     int wanted,
 				     struct ocfs2_alloc_context *meta_ac,
 				     struct buffer_head *bhs[])
@@ -925,6 +924,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
 	u16 suballoc_bit_start;
 	u32 num_got;
 	u64 first_blkno;
+	struct ocfs2_super *osb =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
 	struct ocfs2_extent_block *eb;
 
 	mlog_entry_void();
@@ -950,10 +951,10 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
 				mlog_errno(status);
 				goto bail;
 			}
-			ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
-						      bhs[i]);
+			ocfs2_set_new_buffer_uptodate(et->et_ci, bhs[i]);
 
-			status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), bhs[i],
+			status = ocfs2_journal_access_eb(handle, et->et_ci,
+							 bhs[i],
 							 OCFS2_JOURNAL_ACCESS_CREATE);
 			if (status < 0) {
 				mlog_errno(status);
@@ -1141,7 +1142,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks,
+	status = ocfs2_create_new_meta_bhs(handle, et, new_blocks,
 					   meta_ac, new_eb_bhs);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1292,7 +1293,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
-	status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac,
+	status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
 					   &new_eb_bh);
 	if (status < 0) {
 		mlog_errno(status);
-- 
cgit v1.2.3


From 6641b0ce3274d979338cb67b2f562189dcbc1c28 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 18:57:52 -0800
Subject: ocfs2: Pass ocfs2_extent_tree to ocfs2_unlink_path()

ocfs2_unlink_path() doesn't need struct inode, so let's pass it struct
ocfs2_extent_tree.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ad41eabd8b74..18762f5ebda8 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2536,7 +2536,8 @@ out:
 	return ret;
 }
 
-static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
+static void ocfs2_unlink_path(handle_t *handle,
+			      struct ocfs2_extent_tree *et,
 			      struct ocfs2_cached_dealloc_ctxt *dealloc,
 			      struct ocfs2_path *path, int unlink_start)
 {
@@ -2558,12 +2559,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
 			mlog(ML_ERROR,
 			     "Inode %llu, attempted to remove extent block "
 			     "%llu with %u records\n",
-			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 			     (unsigned long long)le64_to_cpu(eb->h_blkno),
 			     le16_to_cpu(el->l_next_free_rec));
 
 			ocfs2_journal_dirty(handle, bh);
-			ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
+			ocfs2_remove_from_cache(et->et_ci, bh);
 			continue;
 		}
 
@@ -2576,11 +2577,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
 		if (ret)
 			mlog_errno(ret);
 
-		ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
+		ocfs2_remove_from_cache(et->et_ci, bh);
 	}
 }
 
-static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
+static void ocfs2_unlink_subtree(handle_t *handle,
+				 struct ocfs2_extent_tree *et,
 				 struct ocfs2_path *left_path,
 				 struct ocfs2_path *right_path,
 				 int subtree_index,
@@ -2611,7 +2613,7 @@ static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
 	ocfs2_journal_dirty(handle, root_bh);
 	ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
 
-	ocfs2_unlink_path(inode, handle, dealloc, right_path,
+	ocfs2_unlink_path(handle, et, dealloc, right_path,
 			  subtree_index + 1);
 }
 
@@ -2744,7 +2746,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 		mlog_errno(ret);
 
 	if (del_right_subtree) {
-		ocfs2_unlink_subtree(inode, handle, left_path, right_path,
+		ocfs2_unlink_subtree(handle, et, left_path, right_path,
 				     subtree_index, dealloc);
 		ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
 						left_path);
@@ -3067,7 +3069,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 
 		subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
 
-		ocfs2_unlink_subtree(inode, handle, left_path, path,
+		ocfs2_unlink_subtree(handle, et, left_path, path,
 				     subtree_index, dealloc);
 		ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
 						left_path);
@@ -3086,7 +3088,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		 * revert the inode back to having extents
 		 * in-line.
 		 */
-		ocfs2_unlink_path(inode, handle, dealloc, path, 1);
+		ocfs2_unlink_path(handle, et, dealloc, path, 1);
 
 		el = et->et_root_el;
 		el->l_tree_depth = 0;
-- 
cgit v1.2.3


From 4619c73e7c9bd10bac6b60925fa28d5a2eeaf6ed Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:02:36 -0800
Subject: ocfs2: ocfs2_complete_edge_insert() doesn't need struct inode at all.

Completely unused argument.  Get rid of it.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 18762f5ebda8..4a8e9717f961 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1984,7 +1984,7 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
  *   - When we've adjusted the last extent record in the left path leaf and the
  *     1st extent record in the right path leaf during cross extent block merge.
  */
-static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
+static void ocfs2_complete_edge_insert(handle_t *handle,
 				       struct ocfs2_path *left_path,
 				       struct ocfs2_path *right_path,
 				       int subtree_index)
@@ -2161,8 +2161,8 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 		goto out;
 	}
 
-	ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
-				subtree_index);
+	ocfs2_complete_edge_insert(handle, left_path, right_path,
+				   subtree_index);
 
 out:
 	return ret;
@@ -2772,7 +2772,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 
 		*deleted = 1;
 	} else
-		ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
+		ocfs2_complete_edge_insert(handle, left_path, right_path,
 					   subtree_index);
 
 out:
@@ -3430,8 +3430,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		if (ret)
 			mlog_errno(ret);
 
-		ocfs2_complete_edge_insert(inode, handle, left_path,
-					   right_path, subtree_index);
+		ocfs2_complete_edge_insert(handle, left_path, right_path,
+					   subtree_index);
 	}
 out:
 	if (right_path)
@@ -3629,7 +3629,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 			ocfs2_mv_path(right_path, left_path);
 			left_path = NULL;
 		} else
-			ocfs2_complete_edge_insert(inode, handle, left_path,
+			ocfs2_complete_edge_insert(handle, left_path,
 						   right_path, subtree_index);
 	}
 out:
@@ -4195,8 +4195,8 @@ static int ocfs2_insert_path(struct inode *inode,
 		 */
 		subtree_index = ocfs2_find_subtree_root(inode, left_path,
 							right_path);
-		ocfs2_complete_edge_insert(inode, handle, left_path,
-					   right_path, subtree_index);
+		ocfs2_complete_edge_insert(handle, left_path, right_path,
+					   subtree_index);
 	}
 
 	ret = 0;
@@ -5397,7 +5397,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		int subtree_index;
 
 		subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
-		ocfs2_complete_edge_insert(inode, handle, left_path, path,
+		ocfs2_complete_edge_insert(handle, left_path, path,
 					   subtree_index);
 	}
 
-- 
cgit v1.2.3


From 5c601aba8c5d9d5f944cf02b59e3288dd72ae6cf Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:10:13 -0800
Subject: ocfs2: Get inode out of ocfs2_rotate_subtree_root_right().

Pass the ocfs2_extent_list down through ocfs2_rotate_tree_right() and
get rid of struct inode in ocfs2_rotate_subtree_root_right().

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4a8e9717f961..7a04e1791d1a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2062,8 +2062,8 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
 		mlog_errno(ret);
 }
 
-static int ocfs2_rotate_subtree_right(struct inode *inode,
-				      handle_t *handle,
+static int ocfs2_rotate_subtree_right(handle_t *handle,
+				      struct ocfs2_extent_tree *et,
 				      struct ocfs2_path *left_path,
 				      struct ocfs2_path *right_path,
 				      int subtree_index)
@@ -2079,10 +2079,10 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	left_el = path_leaf_el(left_path);
 
 	if (left_el->l_next_free_rec != left_el->l_count) {
-		ocfs2_error(inode->i_sb,
+		ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
 			    "Inode %llu has non-full interior leaf node %llu"
 			    "(next free = %u)",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 			    (unsigned long long)left_leaf_bh->b_blocknr,
 			    le16_to_cpu(left_el->l_next_free_rec));
 		return -EROFS;
@@ -2098,7 +2098,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	root_bh = left_path->p_node[subtree_index].bh;
 	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
@@ -2106,14 +2106,14 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
@@ -2127,7 +2127,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
 	/* This is a code error, not a disk corruption. */
 	mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails "
 			"because rightmost leaf block %llu is empty\n",
-			(unsigned long long)OCFS2_I(inode)->ip_blkno,
+			(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 			(unsigned long long)right_leaf_bh->b_blocknr);
 
 	ocfs2_create_empty_extent(right_el);
@@ -2325,8 +2325,8 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
  *   *ret_left_path will contain a valid path which can be passed to
  *   ocfs2_insert_path().
  */
-static int ocfs2_rotate_tree_right(struct inode *inode,
-				   handle_t *handle,
+static int ocfs2_rotate_tree_right(struct inode *inode, handle_t *handle,
+				   struct ocfs2_extent_tree *et,
 				   enum ocfs2_split_type split,
 				   u32 insert_cpos,
 				   struct ocfs2_path *right_path,
@@ -2335,6 +2335,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 	int ret, start, orig_credits = handle->h_buffer_credits;
 	u32 cpos;
 	struct ocfs2_path *left_path = NULL;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 
 	*ret_left_path = NULL;
 
@@ -2345,7 +2346,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos);
+	ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2383,7 +2384,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 		mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n",
 		     insert_cpos, cpos);
 
-		ret = ocfs2_find_path(INODE_CACHE(inode), left_path, cpos);
+		ret = ocfs2_find_path(et->et_ci, left_path, cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2391,10 +2392,11 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 
 		mlog_bug_on_msg(path_leaf_bh(left_path) ==
 				path_leaf_bh(right_path),
-				"Inode %lu: error during insert of %u "
+				"Owner %llu: error during insert of %u "
 				"(left path cpos %u) results in two identical "
 				"paths ending at %llu\n",
-				inode->i_ino, insert_cpos, cpos,
+				(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+				insert_cpos, cpos,
 				(unsigned long long)
 				path_leaf_bh(left_path)->b_blocknr);
 
@@ -2434,7 +2436,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_rotate_subtree_right(inode, handle, left_path,
+		ret = ocfs2_rotate_subtree_right(handle, et, left_path,
 						 right_path, start);
 		if (ret) {
 			mlog_errno(ret);
@@ -2466,8 +2468,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
 		 */
 		ocfs2_mv_path(right_path, left_path);
 
-		ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path,
-						    &cpos);
+		ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -4268,7 +4269,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 	 * can wind up skipping both of these two special cases...
 	 */
 	if (rotate) {
-		ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split,
+		ret = ocfs2_rotate_tree_right(inode, handle, et, type->ins_split,
 					      le32_to_cpu(insert_rec->e_cpos),
 					      right_path, &left_path);
 		if (ret) {
-- 
cgit v1.2.3


From 7dc028056750328e74ca807041c822068384fe16 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:20:13 -0800
Subject: ocfs2: Pass ocfs2_extent_tree to ocfs2_get_subtree_root()

Get rid of the inode argument.  Use extent_tree instead.  This means a
few more functions have to pass an extent_tree around.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 52 +++++++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7a04e1791d1a..c3edd02c74e8 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1691,7 +1691,7 @@ set_and_inc:
  *
  * The array index of the subtree root is passed back.
  */
-static int ocfs2_find_subtree_root(struct inode *inode,
+static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
 				   struct ocfs2_path *left,
 				   struct ocfs2_path *right)
 {
@@ -1709,10 +1709,10 @@ static int ocfs2_find_subtree_root(struct inode *inode,
 		 * The caller didn't pass two adjacent paths.
 		 */
 		mlog_bug_on_msg(i > left->p_tree_depth,
-				"Inode %lu, left depth %u, right depth %u\n"
+				"Owner %llu, left depth %u, right depth %u\n"
 				"left leaf blk %llu, right leaf blk %llu\n",
-				inode->i_ino, left->p_tree_depth,
-				right->p_tree_depth,
+				(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+				left->p_tree_depth, right->p_tree_depth,
 				(unsigned long long)path_leaf_bh(left)->b_blocknr,
 				(unsigned long long)path_leaf_bh(right)->b_blocknr);
 	} while (left->p_node[i].bh->b_blocknr ==
@@ -2422,7 +2422,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, handle_t *handle,
 			goto out_ret_path;
 		}
 
-		start = ocfs2_find_subtree_root(inode, left_path, right_path);
+		start = ocfs2_find_subtree_root(et, left_path, right_path);
 
 		mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
 		     start,
@@ -2933,7 +2933,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 			goto out;
 		}
 
-		subtree_root = ocfs2_find_subtree_root(inode, left_path,
+		subtree_root = ocfs2_find_subtree_root(et, left_path,
 						       right_path);
 
 		mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
@@ -3068,7 +3068,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 			goto out;
 		}
 
-		subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
+		subtree_index = ocfs2_find_subtree_root(et, left_path, path);
 
 		ocfs2_unlink_subtree(handle, et, left_path, path,
 				     subtree_index, dealloc);
@@ -3324,6 +3324,7 @@ out:
 static int ocfs2_merge_rec_right(struct inode *inode,
 				 struct ocfs2_path *left_path,
 				 handle_t *handle,
+				 struct ocfs2_extent_tree *et,
 				 struct ocfs2_extent_rec *split_rec,
 				 int index)
 {
@@ -3363,8 +3364,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		       le16_to_cpu(left_rec->e_leaf_clusters) !=
 		       le32_to_cpu(right_rec->e_cpos));
 
-		subtree_index = ocfs2_find_subtree_root(inode,
-							left_path, right_path);
+		subtree_index = ocfs2_find_subtree_root(et, left_path,
+							right_path);
 
 		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
 						      handle->h_buffer_credits,
@@ -3377,7 +3378,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3386,14 +3387,14 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
@@ -3406,7 +3407,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 		right_rec = &el->l_recs[index + 1];
 	}
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), left_path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, left_path,
 					   path_num_items(left_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -3417,7 +3418,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 
 	le32_add_cpu(&right_rec->e_cpos, -split_clusters);
 	le64_add_cpu(&right_rec->e_blkno,
-		     -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
+		     -ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
+					       split_clusters));
 	le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
 
 	ocfs2_cleanup_merge(el, index);
@@ -3532,8 +3534,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 		       le16_to_cpu(left_rec->e_leaf_clusters) !=
 		       le32_to_cpu(split_rec->e_cpos));
 
-		subtree_index = ocfs2_find_subtree_root(inode,
-							left_path, right_path);
+		subtree_index = ocfs2_find_subtree_root(et, left_path,
+							right_path);
 
 		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
 						      handle->h_buffer_credits,
@@ -3694,7 +3696,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * if we do merge_right first and merge_left later.
 		 */
 		ret = ocfs2_merge_rec_right(inode, path,
-					    handle, split_rec,
+					    handle, et, split_rec,
 					    split_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3758,9 +3760,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 				goto out;
 			}
 		} else {
-			ret = ocfs2_merge_rec_right(inode,
-						    path,
-						    handle, split_rec,
+			ret = ocfs2_merge_rec_right(inode, path, handle,
+						    et, split_rec,
 						    split_index);
 			if (ret) {
 				mlog_errno(ret);
@@ -4118,6 +4119,7 @@ static void ocfs2_split_record(struct inode *inode,
  */
 static int ocfs2_insert_path(struct inode *inode,
 			     handle_t *handle,
+			     struct ocfs2_extent_tree *et,
 			     struct ocfs2_path *left_path,
 			     struct ocfs2_path *right_path,
 			     struct ocfs2_extent_rec *insert_rec,
@@ -4143,7 +4145,7 @@ static int ocfs2_insert_path(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, left_path);
+		ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -4154,7 +4156,7 @@ static int ocfs2_insert_path(struct inode *inode,
 	 * Pass both paths to the journal. The majority of inserts
 	 * will be touching all components anyway.
 	 */
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, right_path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -4194,7 +4196,7 @@ static int ocfs2_insert_path(struct inode *inode,
 		 *
 		 * XXX: Should we extend the transaction here?
 		 */
-		subtree_index = ocfs2_find_subtree_root(inode, left_path,
+		subtree_index = ocfs2_find_subtree_root(et, left_path,
 							right_path);
 		ocfs2_complete_edge_insert(handle, left_path, right_path,
 					   subtree_index);
@@ -4297,7 +4299,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		}
 	}
 
-	ret = ocfs2_insert_path(inode, handle, left_path, right_path,
+	ret = ocfs2_insert_path(inode, handle, et, left_path, right_path,
 				insert_rec, type);
 	if (ret) {
 		mlog_errno(ret);
@@ -5397,7 +5399,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 	if (left_path) {
 		int subtree_index;
 
-		subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
+		subtree_index = ocfs2_find_subtree_root(et, left_path, path);
 		ocfs2_complete_edge_insert(handle, left_path, path,
 					   subtree_index);
 	}
-- 
cgit v1.2.3


From 6136ca5f5f9fd38da399e9ff9380f537c1b3b901 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:32:43 -0800
Subject: ocfs2: Drop struct inode from ocfs2_extent_tree_operations.

We can get to the inode from the caching information.  Other parent
types don't need it.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 93 +++++++++++++++++++++++---------------------------------
 fs/ocfs2/inode.c |  4 ---
 fs/ocfs2/inode.h |  6 ++++
 3 files changed, 44 insertions(+), 59 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index c3edd02c74e8..072f7fe54073 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -79,18 +79,16 @@ struct ocfs2_extent_tree_operations {
 	 * that value.  new_clusters is the delta, and must be
 	 * added to the total.  Required.
 	 */
-	void (*eo_update_clusters)(struct inode *inode,
-				   struct ocfs2_extent_tree *et,
+	void (*eo_update_clusters)(struct ocfs2_extent_tree *et,
 				   u32 new_clusters);
 
 	/*
 	 * If ->eo_insert_check() exists, it is called before rec is
 	 * inserted into the extent tree.  It is optional.
 	 */
-	int (*eo_insert_check)(struct inode *inode,
-			       struct ocfs2_extent_tree *et,
+	int (*eo_insert_check)(struct ocfs2_extent_tree *et,
 			       struct ocfs2_extent_rec *rec);
-	int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et);
+	int (*eo_sanity_check)(struct ocfs2_extent_tree *et);
 
 	/*
 	 * --------------------------------------------------------------
@@ -109,8 +107,7 @@ struct ocfs2_extent_tree_operations {
 	 * it exists.  If it does not, et->et_max_leaf_clusters is set
 	 * to 0 (unlimited).  Optional.
 	 */
-	void (*eo_fill_max_leaf_clusters)(struct inode *inode,
-					  struct ocfs2_extent_tree *et);
+	void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et);
 };
 
 
@@ -121,14 +118,11 @@ struct ocfs2_extent_tree_operations {
 static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
 static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					 u64 blkno);
-static void ocfs2_dinode_update_clusters(struct inode *inode,
-					 struct ocfs2_extent_tree *et,
+static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 					 u32 clusters);
-static int ocfs2_dinode_insert_check(struct inode *inode,
-				     struct ocfs2_extent_tree *et,
+static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
 				     struct ocfs2_extent_rec *rec);
-static int ocfs2_dinode_sanity_check(struct inode *inode,
-				     struct ocfs2_extent_tree *et);
+static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
 static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
 static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
 	.eo_set_last_eb_blk	= ocfs2_dinode_set_last_eb_blk,
@@ -156,40 +150,37 @@ static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return le64_to_cpu(di->i_last_eb_blk);
 }
 
-static void ocfs2_dinode_update_clusters(struct inode *inode,
-					 struct ocfs2_extent_tree *et,
+static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 					 u32 clusters)
 {
+	struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
 	struct ocfs2_dinode *di = et->et_object;
 
 	le32_add_cpu(&di->i_clusters, clusters);
-	spin_lock(&OCFS2_I(inode)->ip_lock);
-	OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
-	spin_unlock(&OCFS2_I(inode)->ip_lock);
+	spin_lock(&oi->ip_lock);
+	oi->ip_clusters = le32_to_cpu(di->i_clusters);
+	spin_unlock(&oi->ip_lock);
 }
 
-static int ocfs2_dinode_insert_check(struct inode *inode,
-				     struct ocfs2_extent_tree *et,
+static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
 				     struct ocfs2_extent_rec *rec)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
+	struct ocfs2_super *osb = OCFS2_SB(oi->vfs_inode.i_sb);
 
-	BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
+	BUG_ON(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL);
 	mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
-			(OCFS2_I(inode)->ip_clusters !=
-			 le32_to_cpu(rec->e_cpos)),
+			(oi->ip_clusters != le32_to_cpu(rec->e_cpos)),
 			"Device %s, asking for sparse allocation: inode %llu, "
 			"cpos %u, clusters %u\n",
 			osb->dev_str,
-			(unsigned long long)OCFS2_I(inode)->ip_blkno,
-			rec->e_cpos,
-			OCFS2_I(inode)->ip_clusters);
+			(unsigned long long)oi->ip_blkno,
+			rec->e_cpos, oi->ip_clusters);
 
 	return 0;
 }
 
-static int ocfs2_dinode_sanity_check(struct inode *inode,
-				     struct ocfs2_extent_tree *et)
+static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et)
 {
 	struct ocfs2_dinode *di = et->et_object;
 
@@ -229,8 +220,7 @@ static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
 }
 
-static void ocfs2_xattr_value_update_clusters(struct inode *inode,
-					      struct ocfs2_extent_tree *et,
+static void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et,
 					      u32 clusters)
 {
 	struct ocfs2_xattr_value_buf *vb = et->et_object;
@@ -252,12 +242,11 @@ static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
 	et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
 }
 
-static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode,
-						    struct ocfs2_extent_tree *et)
+static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct ocfs2_extent_tree *et)
 {
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 	et->et_max_leaf_clusters =
-		ocfs2_clusters_for_bytes(inode->i_sb,
-					 OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
+		ocfs2_clusters_for_bytes(sb, OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
 }
 
 static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
@@ -277,8 +266,7 @@ static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return le64_to_cpu(xt->xt_last_eb_blk);
 }
 
-static void ocfs2_xattr_tree_update_clusters(struct inode *inode,
-					     struct ocfs2_extent_tree *et,
+static void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et,
 					     u32 clusters)
 {
 	struct ocfs2_xattr_block *xb = et->et_object;
@@ -309,8 +297,7 @@ static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return le64_to_cpu(dx_root->dr_last_eb_blk);
 }
 
-static void ocfs2_dx_root_update_clusters(struct inode *inode,
-					  struct ocfs2_extent_tree *et,
+static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et,
 					  u32 clusters)
 {
 	struct ocfs2_dx_root_block *dx_root = et->et_object;
@@ -318,8 +305,7 @@ static void ocfs2_dx_root_update_clusters(struct inode *inode,
 	le32_add_cpu(&dx_root->dr_clusters, clusters);
 }
 
-static int ocfs2_dx_root_sanity_check(struct inode *inode,
-				      struct ocfs2_extent_tree *et)
+static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et)
 {
 	struct ocfs2_dx_root_block *dx_root = et->et_object;
 
@@ -362,7 +348,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 	if (!et->et_ops->eo_fill_max_leaf_clusters)
 		et->et_max_leaf_clusters = 0;
 	else
-		et->et_ops->eo_fill_max_leaf_clusters(inode, et);
+		et->et_ops->eo_fill_max_leaf_clusters(et);
 }
 
 void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
@@ -408,11 +394,10 @@ static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
 	return et->et_ops->eo_get_last_eb_blk(et);
 }
 
-static inline void ocfs2_et_update_clusters(struct inode *inode,
-					    struct ocfs2_extent_tree *et,
+static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
 					    u32 clusters)
 {
-	et->et_ops->eo_update_clusters(inode, et, clusters);
+	et->et_ops->eo_update_clusters(et, clusters);
 }
 
 static inline int ocfs2_et_root_journal_access(handle_t *handle,
@@ -423,24 +408,22 @@ static inline int ocfs2_et_root_journal_access(handle_t *handle,
 					  type);
 }
 
-static inline int ocfs2_et_insert_check(struct inode *inode,
-					struct ocfs2_extent_tree *et,
+static inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et,
 					struct ocfs2_extent_rec *rec)
 {
 	int ret = 0;
 
 	if (et->et_ops->eo_insert_check)
-		ret = et->et_ops->eo_insert_check(inode, et, rec);
+		ret = et->et_ops->eo_insert_check(et, rec);
 	return ret;
 }
 
-static inline int ocfs2_et_sanity_check(struct inode *inode,
-					struct ocfs2_extent_tree *et)
+static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
 {
 	int ret = 0;
 
 	if (et->et_ops->eo_sanity_check)
-		ret = et->et_ops->eo_sanity_check(inode, et);
+		ret = et->et_ops->eo_sanity_check(et);
 	return ret;
 }
 
@@ -3016,7 +2999,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 	struct ocfs2_extent_list *el;
 
 
-	ret = ocfs2_et_sanity_check(inode, et);
+	ret = ocfs2_et_sanity_check(et);
 	if (ret)
 		goto out;
 	/*
@@ -4308,7 +4291,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 
 out_update_clusters:
 	if (type->ins_split == SPLIT_NONE)
-		ocfs2_et_update_clusters(inode, et,
+		ocfs2_et_update_clusters(et,
 					 le16_to_cpu(insert_rec->e_leaf_clusters));
 
 	ret = ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4697,7 +4680,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	rec.e_blkno = cpu_to_le64(start_blk);
 	rec.e_leaf_clusters = cpu_to_le16(new_clusters);
 	rec.e_flags = flags;
-	status = ocfs2_et_insert_check(inode, et, &rec);
+	status = ocfs2_et_insert_check(et, &rec);
 	if (status) {
 		mlog_errno(status);
 		goto bail;
@@ -5603,7 +5586,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out_commit;
 	}
 
-	ocfs2_et_update_clusters(inode, et, -len);
+	ocfs2_et_update_clusters(et, -len);
 
 	ret = ocfs2_journal_dirty(handle, et->et_root_bh);
 	if (ret) {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 179c819e52ec..e82ceb31cc83 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1395,10 +1395,6 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
 	return ocfs2_read_inode_block_full(inode, bh, 0);
 }
 
-static struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_info *ci)
-{
-	return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
-}
 
 static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
 {
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 67392f60629d..ba4fe07b293c 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -170,4 +170,10 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
 /* The same, but can be passed OCFS2_BH_* flags */
 int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
 				int flags);
+
+static inline struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_info *ci)
+{
+	return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
+}
+
 #endif /* OCFS2_INODE_H */
-- 
cgit v1.2.3


From 1bbf0b8d606645c7596ee641acfbf042765c9719 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:42:08 -0800
Subject: ocfs2: ocfs2_rotate_tree_right() doesn't need struct inode.

We don't need struct inode in ocfs2_rotate_tree_right() anymore.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 072f7fe54073..93f02a11302e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2308,7 +2308,7 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
  *   *ret_left_path will contain a valid path which can be passed to
  *   ocfs2_insert_path().
  */
-static int ocfs2_rotate_tree_right(struct inode *inode, handle_t *handle,
+static int ocfs2_rotate_tree_right(handle_t *handle,
 				   struct ocfs2_extent_tree *et,
 				   enum ocfs2_split_type split,
 				   u32 insert_cpos,
@@ -4254,7 +4254,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 	 * can wind up skipping both of these two special cases...
 	 */
 	if (rotate) {
-		ret = ocfs2_rotate_tree_right(inode, handle, et, type->ins_split,
+		ret = ocfs2_rotate_tree_right(handle, et, type->ins_split,
 					      le32_to_cpu(insert_rec->e_cpos),
 					      right_path, &left_path);
 		if (ret) {
-- 
cgit v1.2.3


From 09106bae05c3350e8d0ef0ede90b1c3da4bda2f8 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:43:57 -0800
Subject: ocfs2: ocfs2_update_edge_lengths() doesn't need struct inode.

Pass in the extent tree, which is all we need.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 93f02a11302e..8efcface6869 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2465,7 +2465,8 @@ out_ret_path:
 	return ret;
 }
 
-static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
+static int ocfs2_update_edge_lengths(handle_t *handle,
+				     struct ocfs2_extent_tree *et,
 				     int subtree_index, struct ocfs2_path *path)
 {
 	int i, idx, ret;
@@ -2490,7 +2491,7 @@ static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
 		goto out;
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2732,7 +2733,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	if (del_right_subtree) {
 		ocfs2_unlink_subtree(handle, et, left_path, right_path,
 				     subtree_index, dealloc);
-		ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+		ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
 						left_path);
 		if (ret) {
 			mlog_errno(ret);
@@ -3055,7 +3056,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 
 		ocfs2_unlink_subtree(handle, et, left_path, path,
 				     subtree_index, dealloc);
-		ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
+		ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
 						left_path);
 		if (ret) {
 			mlog_errno(ret);
-- 
cgit v1.2.3


From 1e2dd63fe0b6e99b81904a61090db801978b9520 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:45:28 -0800
Subject: ocfs2: ocfs2_rotate_subtree_left() doesn't need struct inode.

It already has struct ocfs2_extent_tree, which has the caching info.  So
we don't need to pass it struct inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 8efcface6869..b358d567313e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2602,13 +2602,13 @@ static void ocfs2_unlink_subtree(handle_t *handle,
 			  subtree_index + 1);
 }
 
-static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
+static int ocfs2_rotate_subtree_left(handle_t *handle,
+				     struct ocfs2_extent_tree *et,
 				     struct ocfs2_path *left_path,
 				     struct ocfs2_path *right_path,
 				     int subtree_index,
 				     struct ocfs2_cached_dealloc_ctxt *dealloc,
-				     int *deleted,
-				     struct ocfs2_extent_tree *et)
+				     int *deleted)
 {
 	int ret, i, del_right_subtree = 0, right_has_empty = 0;
 	struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
@@ -2644,7 +2644,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 			return -EAGAIN;
 
 		if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
-			ret = ocfs2_journal_access_eb(handle, INODE_CACHE(inode),
+			ret = ocfs2_journal_access_eb(handle, et->et_ci,
 						      path_leaf_bh(right_path),
 						      OCFS2_JOURNAL_ACCESS_WRITE);
 			if (ret) {
@@ -2679,7 +2679,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	 */
 	BUG_ON(right_has_empty && !del_right_subtree);
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 					   subtree_index);
 	if (ret) {
 		mlog_errno(ret);
@@ -2687,14 +2687,14 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
 	}
 
 	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   right_path, i);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   left_path, i);
 		if (ret) {
 			mlog_errno(ret);
@@ -2944,9 +2944,9 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
+		ret = ocfs2_rotate_subtree_left(handle, et, left_path,
 						right_path, subtree_root,
-						dealloc, &deleted, et);
+						dealloc, &deleted);
 		if (ret == -EAGAIN) {
 			/*
 			 * The rotation has to temporarily stop due to
-- 
cgit v1.2.3


From e46f74dc357947e2aed9bdd63cf335c5fd23810b Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 12 Feb 2009 19:47:43 -0800
Subject: ocfs2: __ocfs2_rotate_tree_left() doesn't need struct inode.

It already has struct ocfs2_extent_tree, which has the caching info.  So
we don't need to pass it struct inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b358d567313e..12dbd6ee6faf 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2871,24 +2871,24 @@ out:
 	return ret;
 }
 
-static int __ocfs2_rotate_tree_left(struct inode *inode,
-				    handle_t *handle, int orig_credits,
+static int __ocfs2_rotate_tree_left(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
+				    int orig_credits,
 				    struct ocfs2_path *path,
 				    struct ocfs2_cached_dealloc_ctxt *dealloc,
-				    struct ocfs2_path **empty_extent_path,
-				    struct ocfs2_extent_tree *et)
+				    struct ocfs2_path **empty_extent_path)
 {
 	int ret, subtree_root, deleted;
 	u32 right_cpos;
 	struct ocfs2_path *left_path = NULL;
 	struct ocfs2_path *right_path = NULL;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 
 	BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
 
 	*empty_extent_path = NULL;
 
-	ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path,
-					     &right_cpos);
+	ret = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -2937,7 +2937,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 		 * Caller might still want to make changes to the
 		 * tree root, so re-add it to the journal here.
 		 */
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 						   left_path, 0);
 		if (ret) {
 			mlog_errno(ret);
@@ -2973,7 +2973,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
 
 		ocfs2_mv_path(left_path, right_path);
 
-		ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
+		ret = ocfs2_find_cpos_for_right_leaf(sb, left_path,
 						     &right_cpos);
 		if (ret) {
 			mlog_errno(ret);
@@ -3187,8 +3187,8 @@ rightmost_no_delete:
 	 * and restarting from there.
 	 */
 try_rotate:
-	ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path,
-				       dealloc, &restart_path, et);
+	ret = __ocfs2_rotate_tree_left(handle, et, orig_credits, path,
+				       dealloc, &restart_path);
 	if (ret && ret != -EAGAIN) {
 		mlog_errno(ret);
 		goto out;
@@ -3198,9 +3198,9 @@ try_rotate:
 		tmp_path = restart_path;
 		restart_path = NULL;
 
-		ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits,
+		ret = __ocfs2_rotate_tree_left(handle, et, orig_credits,
 					       tmp_path, dealloc,
-					       &restart_path, et);
+					       &restart_path);
 		if (ret && ret != -EAGAIN) {
 			mlog_errno(ret);
 			goto out;
-- 
cgit v1.2.3


From 70f18c08b476e315c8ee17ea34b55ea1957e7e7d Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:09:31 -0800
Subject: ocfs2: ocfs2_rotate_tree_left() no longer needs struct inode.

It already gets ocfs2_extent_tree, so we can just use that.  This chains
to the same modification for ocfs2_remove_rightmost_path() and
ocfs2_rotate_rightmost_leaf_left().

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 56 ++++++++++++++++++++++++++------------------------------
 1 file changed, 26 insertions(+), 30 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 12dbd6ee6faf..d348cfb509e4 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2843,8 +2843,8 @@ out:
 	return ret;
 }
 
-static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
-					    handle_t *handle,
+static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
+					    struct ocfs2_extent_tree *et,
 					    struct ocfs2_path *path)
 {
 	int ret;
@@ -2854,7 +2854,7 @@ static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
 	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
 		return 0;
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
 					   path_num_items(path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -2988,10 +2988,10 @@ out:
 	return ret;
 }
 
-static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
+static int ocfs2_remove_rightmost_path(handle_t *handle,
+				struct ocfs2_extent_tree *et,
 				struct ocfs2_path *path,
-				struct ocfs2_cached_dealloc_ctxt *dealloc,
-				struct ocfs2_extent_tree *et)
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret, subtree_index;
 	u32 cpos;
@@ -3070,7 +3070,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
 		 * 'path' is also the leftmost path which
 		 * means it must be the only one. This gets
 		 * handled differently because we want to
-		 * revert the inode back to having extents
+		 * revert the root back to having extents
 		 * in-line.
 		 */
 		ocfs2_unlink_path(handle, et, dealloc, path, 1);
@@ -3106,10 +3106,10 @@ out:
  * the rightmost tree leaf record is removed so the caller is
  * responsible for detecting and correcting that.
  */
-static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
+static int ocfs2_rotate_tree_left(handle_t *handle,
+				  struct ocfs2_extent_tree *et,
 				  struct ocfs2_path *path,
-				  struct ocfs2_cached_dealloc_ctxt *dealloc,
-				  struct ocfs2_extent_tree *et)
+				  struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret, orig_credits = handle->h_buffer_credits;
 	struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
@@ -3126,8 +3126,7 @@ rightmost_no_delete:
 		 * Inline extents. This is trivially handled, so do
 		 * it up front.
 		 */
-		ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
-						       path);
+		ret = ocfs2_rotate_rightmost_leaf_left(handle, et, path);
 		if (ret)
 			mlog_errno(ret);
 		goto out;
@@ -3143,7 +3142,7 @@ rightmost_no_delete:
 	 *
 	 *  1) is handled via ocfs2_rotate_rightmost_leaf_left()
 	 *  2a) we need the left branch so that we can update it with the unlink
-	 *  2b) we need to bring the inode back to inline extents.
+	 *  2b) we need to bring the root back to inline extents.
 	 */
 
 	eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
@@ -3159,9 +3158,9 @@ rightmost_no_delete:
 
 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
 			ret = -EIO;
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu has empty extent block at %llu",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu has empty extent block at %llu",
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 				    (unsigned long long)le64_to_cpu(eb->h_blkno));
 			goto out;
 		}
@@ -3175,8 +3174,8 @@ rightmost_no_delete:
 		 * nonempty list.
 		 */
 
-		ret = ocfs2_remove_rightmost_path(inode, handle, path,
-						  dealloc, et);
+		ret = ocfs2_remove_rightmost_path(handle, et, path,
+						  dealloc);
 		if (ret)
 			mlog_errno(ret);
 		goto out;
@@ -3602,9 +3601,9 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 		if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
 		    le16_to_cpu(el->l_next_free_rec) == 1) {
 
-			ret = ocfs2_remove_rightmost_path(inode, handle,
+			ret = ocfs2_remove_rightmost_path(handle, et,
 							  right_path,
-							  dealloc, et);
+							  dealloc);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -3649,8 +3648,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * extents - having more than one in a leaf is
 		 * illegal.
 		 */
-		ret = ocfs2_rotate_tree_left(inode, handle, path,
-					     dealloc, et);
+		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3693,8 +3691,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
 
 		/* The merge left us with an empty extent, remove it. */
-		ret = ocfs2_rotate_tree_left(inode, handle, path,
-					     dealloc, et);
+		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3716,8 +3713,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_rotate_tree_left(inode, handle, path,
-					     dealloc, et);
+		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		/*
 		 * Error from this last rotate is not critical, so
 		 * print but don't bubble it up.
@@ -3758,8 +3754,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 			 * The merge may have left an empty extent in
 			 * our leaf. Try to rotate it away.
 			 */
-			ret = ocfs2_rotate_tree_left(inode, handle, path,
-						     dealloc, et);
+			ret = ocfs2_rotate_tree_left(handle, et, path,
+						     dealloc);
 			if (ret)
 				mlog_errno(ret);
 			ret = 0;
@@ -5259,7 +5255,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 	struct ocfs2_extent_block *eb;
 
 	if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
-		ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
+		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -5390,7 +5386,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 
 	ocfs2_journal_dirty(handle, path_leaf_bh(path));
 
-	ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
+	ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
-- 
cgit v1.2.3


From 4fe82c312a7d975a9d0f591dc9180c1197ee4270 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:16:08 -0800
Subject: ocfs2: ocfs2_merge_rec_left/right() no longer need struct inode.

Drop it from the parameters - they already have ocfs2_extent_list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 54 ++++++++++++++++++++++++------------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d348cfb509e4..bac6ca024768 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3250,7 +3250,7 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
 	}
 }
 
-static int ocfs2_get_right_path(struct inode *inode,
+static int ocfs2_get_right_path(struct ocfs2_extent_tree *et,
 				struct ocfs2_path *left_path,
 				struct ocfs2_path **ret_right_path)
 {
@@ -3267,8 +3267,8 @@ static int ocfs2_get_right_path(struct inode *inode,
 	left_el = path_leaf_el(left_path);
 	BUG_ON(left_el->l_next_free_rec != left_el->l_count);
 
-	ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
-					     &right_cpos);
+	ret = ocfs2_find_cpos_for_right_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+					     left_path, &right_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3284,7 +3284,7 @@ static int ocfs2_get_right_path(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(INODE_CACHE(inode), right_path, right_cpos);
+	ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3304,8 +3304,7 @@ out:
  * For index == l_count - 1, the "next" means the 1st extent rec of the
  * next extent block.
  */
-static int ocfs2_merge_rec_right(struct inode *inode,
-				 struct ocfs2_path *left_path,
+static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
 				 handle_t *handle,
 				 struct ocfs2_extent_tree *et,
 				 struct ocfs2_extent_rec *split_rec,
@@ -3328,7 +3327,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
 	if (index == le16_to_cpu(el->l_next_free_rec) - 1 &&
 	    le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
 		/* we meet with a cross extent block merge. */
-		ret = ocfs2_get_right_path(inode, left_path, &right_path);
+		ret = ocfs2_get_right_path(et, left_path, &right_path);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3425,7 +3424,7 @@ out:
 	return ret;
 }
 
-static int ocfs2_get_left_path(struct inode *inode,
+static int ocfs2_get_left_path(struct ocfs2_extent_tree *et,
 			       struct ocfs2_path *right_path,
 			       struct ocfs2_path **ret_left_path)
 {
@@ -3438,7 +3437,7 @@ static int ocfs2_get_left_path(struct inode *inode,
 	/* This function shouldn't be called for non-trees. */
 	BUG_ON(right_path->p_tree_depth == 0);
 
-	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
+	ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
 					    right_path, &left_cpos);
 	if (ret) {
 		mlog_errno(ret);
@@ -3455,7 +3454,7 @@ static int ocfs2_get_left_path(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_find_path(INODE_CACHE(inode), left_path, left_cpos);
+	ret = ocfs2_find_path(et->et_ci, left_path, left_cpos);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -3478,12 +3477,11 @@ out:
  * remove the rightmost leaf extent block in the right_path and change
  * the right path to indicate the new rightmost path.
  */
-static int ocfs2_merge_rec_left(struct inode *inode,
-				struct ocfs2_path *right_path,
+static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
 				handle_t *handle,
+				struct ocfs2_extent_tree *et,
 				struct ocfs2_extent_rec *split_rec,
 				struct ocfs2_cached_dealloc_ctxt *dealloc,
-				struct ocfs2_extent_tree *et,
 				int index)
 {
 	int ret, i, subtree_index = 0, has_empty_extent = 0;
@@ -3501,7 +3499,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 	right_rec = &el->l_recs[index];
 	if (index == 0) {
 		/* we meet with a cross extent block merge. */
-		ret = ocfs2_get_left_path(inode, right_path, &left_path);
+		ret = ocfs2_get_left_path(et, right_path, &left_path);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3531,7 +3529,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 		root_bh = left_path->p_node[subtree_index].bh;
 		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
 
-		ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+		ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 						   subtree_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3540,14 +3538,14 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 
 		for (i = subtree_index + 1;
 		     i < path_num_items(right_path); i++) {
-			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 							   right_path, i);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 
-			ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode),
+			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
 							   left_path, i);
 			if (ret) {
 				mlog_errno(ret);
@@ -3560,7 +3558,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 			has_empty_extent = 1;
 	}
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), right_path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
 					   path_num_items(right_path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -3579,7 +3577,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
 
 	le32_add_cpu(&right_rec->e_cpos, split_clusters);
 	le64_add_cpu(&right_rec->e_blkno,
-		     ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
+		     ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
+					      split_clusters));
 	le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
 
 	ocfs2_cleanup_merge(el, index);
@@ -3677,8 +3676,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * prevoius extent block. It is more efficient and easier
 		 * if we do merge_right first and merge_left later.
 		 */
-		ret = ocfs2_merge_rec_right(inode, path,
-					    handle, et, split_rec,
+		ret = ocfs2_merge_rec_right(path, handle, et, split_rec,
 					    split_index);
 		if (ret) {
 			mlog_errno(ret);
@@ -3703,10 +3701,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * Note that we don't pass split_rec here on purpose -
 		 * we've merged it into the rec already.
 		 */
-		ret = ocfs2_merge_rec_left(inode, path,
-					   handle, rec,
-					   dealloc, et,
-					   split_index);
+		ret = ocfs2_merge_rec_left(path, handle, et, rec,
+					   dealloc, split_index);
 
 		if (ret) {
 			mlog_errno(ret);
@@ -3730,17 +3726,15 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * the record on the left (hence the left merge).
 		 */
 		if (ctxt->c_contig_type == CONTIG_RIGHT) {
-			ret = ocfs2_merge_rec_left(inode,
-						   path,
-						   handle, split_rec,
-						   dealloc, et,
+			ret = ocfs2_merge_rec_left(path, handle, et,
+						   split_rec, dealloc,
 						   split_index);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
 			}
 		} else {
-			ret = ocfs2_merge_rec_right(inode, path, handle,
+			ret = ocfs2_merge_rec_right(path, handle,
 						    et, split_rec,
 						    split_index);
 			if (ret) {
-- 
cgit v1.2.3


From c495dd24ac00654f99540f533185e1fcc9534009 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:19:11 -0800
Subject: ocfs2: ocfs2_try_to_merge_extent() doesn't need struct inode.

It's not using it, so remove it from the parameter list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index bac6ca024768..2c4967f7b667 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3623,15 +3623,13 @@ out:
 	return ret;
 }
 
-static int ocfs2_try_to_merge_extent(struct inode *inode,
-				     handle_t *handle,
+static int ocfs2_try_to_merge_extent(handle_t *handle,
+				     struct ocfs2_extent_tree *et,
 				     struct ocfs2_path *path,
 				     int split_index,
 				     struct ocfs2_extent_rec *split_rec,
 				     struct ocfs2_cached_dealloc_ctxt *dealloc,
-				     struct ocfs2_merge_ctxt *ctxt,
-				     struct ocfs2_extent_tree *et)
-
+				     struct ocfs2_merge_ctxt *ctxt)
 {
 	int ret = 0;
 	struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -5069,9 +5067,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 		if (ret)
 			mlog_errno(ret);
 	} else {
-		ret = ocfs2_try_to_merge_extent(inode, handle, path,
+		ret = ocfs2_try_to_merge_extent(handle, et, path,
 						split_index, split_rec,
-						dealloc, &ctxt, et);
+						dealloc, &ctxt);
 		if (ret)
 			mlog_errno(ret);
 	}
-- 
cgit v1.2.3


From d401dc12fcced123909eba10334fb5d78866d1a9 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:24:10 -0800
Subject: ocfs2: ocfs2_grow_branch() and ocfs2_append_rec_to_path() lose struct
 inode.

ocfs2_grow_branch() not really using it other than to pass it to the
subfunctions ocfs2_shift_tree_depth(), ocfs2_find_branch_target(), and
ocfs2_add_branch().  The first two weren't it either, so they drop the
argument.  ocfs2_add_branch() only passed it to
ocfs2_adjust_rightmost_branch(), which drops the inode argument and uses
the ocfs2_extent_tree as well.

ocfs2_append_rec_to_path() can be take an ocfs2_extent_tree instead of
the inode.  The function ocfs2_adjust_rightmost_records() goes along for
the ride.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 80 +++++++++++++++++++++++++-------------------------------
 1 file changed, 36 insertions(+), 44 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 2c4967f7b667..e1479faeb01c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -460,8 +460,8 @@ struct ocfs2_path {
 
 static int ocfs2_find_path(struct ocfs2_caching_info *ci,
 			   struct ocfs2_path *path, u32 cpos);
-static void ocfs2_adjust_rightmost_records(struct inode *inode,
-					   handle_t *handle,
+static void ocfs2_adjust_rightmost_records(handle_t *handle,
+					   struct ocfs2_extent_tree *et,
 					   struct ocfs2_path *path,
 					   struct ocfs2_extent_rec *insert_rec);
 /*
@@ -1009,7 +1009,6 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list  *el)
  * extent block's rightmost record.
  */
 static int ocfs2_adjust_rightmost_branch(handle_t *handle,
-					 struct inode *inode,
 					 struct ocfs2_extent_tree *et)
 {
 	int status;
@@ -1036,7 +1035,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
 		goto out;
 	}
 
-	status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
+	status = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out;
@@ -1045,7 +1044,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
 	el = path_leaf_el(path);
 	rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1];
 
-	ocfs2_adjust_rightmost_records(inode, handle, path, rec);
+	ocfs2_adjust_rightmost_records(handle, et, path, rec);
 
 out:
 	ocfs2_free_path(path);
@@ -1054,7 +1053,7 @@ out:
 
 /*
  * Add an entire tree branch to our inode. eb_bh is the extent block
- * to start at, if we don't want to start the branch at the dinode
+ * to start at, if we don't want to start the branch at the root
  * structure.
  *
  * last_eb_bh is required as we have to update it's next_leaf pointer
@@ -1063,9 +1062,7 @@ out:
  * the new branch will be 'empty' in the sense that every block will
  * contain a single record with cluster count == 0.
  */
-static int ocfs2_add_branch(struct ocfs2_super *osb,
-			    handle_t *handle,
-			    struct inode *inode,
+static int ocfs2_add_branch(handle_t *handle,
 			    struct ocfs2_extent_tree *et,
 			    struct buffer_head *eb_bh,
 			    struct buffer_head **last_eb_bh,
@@ -1109,7 +1106,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 	if (root_end > new_cpos) {
 		mlog(0, "adjust the cluster end from %u to %u\n",
 		     root_end, new_cpos);
-		status = ocfs2_adjust_rightmost_branch(handle, inode, et);
+		status = ocfs2_adjust_rightmost_branch(handle, et);
 		if (status) {
 			mlog_errno(status);
 			goto bail;
@@ -1147,7 +1144,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
 		eb_el = &eb->h_list;
 
-		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), bh,
+		status = ocfs2_journal_access_eb(handle, et->et_ci, bh,
 						 OCFS2_JOURNAL_ACCESS_CREATE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1187,7 +1184,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 	 * journal_dirty erroring as it won't unless we've aborted the
 	 * handle (in which case we would never be here) so reserving
 	 * the write with journal_access is all we need to do. */
-	status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), *last_eb_bh,
+	status = ocfs2_journal_access_eb(handle, et->et_ci, *last_eb_bh,
 					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1200,7 +1197,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
 		goto bail;
 	}
 	if (eb_bh) {
-		status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), eb_bh,
+		status = ocfs2_journal_access_eb(handle, et->et_ci, eb_bh,
 						 OCFS2_JOURNAL_ACCESS_WRITE);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1260,9 +1257,7 @@ bail:
  * returns back the new extent block so you can add a branch to it
  * after this call.
  */
-static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
-				  handle_t *handle,
-				  struct inode *inode,
+static int ocfs2_shift_tree_depth(handle_t *handle,
 				  struct ocfs2_extent_tree *et,
 				  struct ocfs2_alloc_context *meta_ac,
 				  struct buffer_head **ret_new_eb_bh)
@@ -1290,7 +1285,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
 	eb_el = &eb->h_list;
 	root_el = et->et_root_el;
 
-	status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), new_eb_bh,
+	status = ocfs2_journal_access_eb(handle, et->et_ci, new_eb_bh,
 					 OCFS2_JOURNAL_ACCESS_CREATE);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1365,8 +1360,7 @@ bail:
  *
  * return status < 0 indicates an error.
  */
-static int ocfs2_find_branch_target(struct ocfs2_super *osb,
-				    struct ocfs2_extent_tree *et,
+static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
 				    struct buffer_head **target_bh)
 {
 	int status = 0, i;
@@ -1447,20 +1441,18 @@ bail:
  *
  * *last_eb_bh will be updated by ocfs2_add_branch().
  */
-static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
-			   struct ocfs2_extent_tree *et, int *final_depth,
-			   struct buffer_head **last_eb_bh,
+static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
+			   int *final_depth, struct buffer_head **last_eb_bh,
 			   struct ocfs2_alloc_context *meta_ac)
 {
 	int ret, shift;
 	struct ocfs2_extent_list *el = et->et_root_el;
 	int depth = le16_to_cpu(el->l_tree_depth);
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct buffer_head *bh = NULL;
 
 	BUG_ON(meta_ac == NULL);
 
-	shift = ocfs2_find_branch_target(osb, et, &bh);
+	shift = ocfs2_find_branch_target(et, &bh);
 	if (shift < 0) {
 		ret = shift;
 		mlog_errno(ret);
@@ -1477,8 +1469,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
 		/* ocfs2_shift_tree_depth will return us a buffer with
 		 * the new extent block (so we can pass that to
 		 * ocfs2_add_branch). */
-		ret = ocfs2_shift_tree_depth(osb, handle, inode, et,
-					     meta_ac, &bh);
+		ret = ocfs2_shift_tree_depth(handle, et, meta_ac, &bh);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -1504,7 +1495,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
 	/* call ocfs2_add_branch to add the final part of the tree with
 	 * the new data. */
 	mlog(0, "add branch. bh = %p\n", bh);
-	ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh,
+	ret = ocfs2_add_branch(handle, et, bh, last_eb_bh,
 			       meta_ac);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -3881,8 +3872,8 @@ rotate:
 	ocfs2_rotate_leaf(el, insert_rec);
 }
 
-static void ocfs2_adjust_rightmost_records(struct inode *inode,
-					   handle_t *handle,
+static void ocfs2_adjust_rightmost_records(handle_t *handle,
+					   struct ocfs2_extent_tree *et,
 					   struct ocfs2_path *path,
 					   struct ocfs2_extent_rec *insert_rec)
 {
@@ -3900,9 +3891,9 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
 
 		next_free = le16_to_cpu(el->l_next_free_rec);
 		if (next_free == 0) {
-			ocfs2_error(inode->i_sb,
-				    "Dinode %llu has a bad extent list",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu has a bad extent list",
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
 			ret = -EIO;
 			return;
 		}
@@ -3922,7 +3913,8 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
 	}
 }
 
-static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
+static int ocfs2_append_rec_to_path(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
 				    struct ocfs2_extent_rec *insert_rec,
 				    struct ocfs2_path *right_path,
 				    struct ocfs2_path **ret_left_path)
@@ -3950,8 +3942,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
 		u32 left_cpos;
 
-		ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path,
-						    &left_cpos);
+		ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
+						    right_path, &left_cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -3973,7 +3965,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 				goto out;
 			}
 
-			ret = ocfs2_find_path(INODE_CACHE(inode), left_path,
+			ret = ocfs2_find_path(et->et_ci, left_path,
 					      left_cpos);
 			if (ret) {
 				mlog_errno(ret);
@@ -3987,13 +3979,13 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
 		}
 	}
 
-	ret = ocfs2_journal_access_path(INODE_CACHE(inode), handle, right_path);
+	ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec);
+	ocfs2_adjust_rightmost_records(handle, et, right_path, insert_rec);
 
 	*ret_left_path = left_path;
 	ret = 0;
@@ -4263,7 +4255,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		}
 	} else if (type->ins_appending == APPEND_TAIL
 		   && type->ins_contig != CONTIG_LEFT) {
-		ret = ocfs2_append_rec_to_path(inode, handle, insert_rec,
+		ret = ocfs2_append_rec_to_path(handle, et, insert_rec,
 					       right_path, &left_path);
 		if (ret) {
 			mlog_errno(ret);
@@ -4689,7 +4681,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	     free_records, insert.ins_tree_depth);
 
 	if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
-		status = ocfs2_grow_tree(inode, handle, et,
+		status = ocfs2_grow_tree(handle, et,
 					 &insert.ins_tree_depth, &last_eb_bh,
 					 meta_ac);
 		if (status) {
@@ -4876,7 +4868,7 @@ leftright:
 
 	if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
 	    le16_to_cpu(rightmost_el->l_count)) {
-		ret = ocfs2_grow_tree(inode, handle, et,
+		ret = ocfs2_grow_tree(handle, et,
 				      &depth, last_eb_bh, meta_ac);
 		if (ret) {
 			mlog_errno(ret);
@@ -5208,7 +5200,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
 
 	if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
 	    le16_to_cpu(rightmost_el->l_count)) {
-		ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh,
+		ret = ocfs2_grow_tree(handle, et, &depth, &last_eb_bh,
 				      meta_ac);
 		if (ret) {
 			mlog_errno(ret);
@@ -5346,7 +5338,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 			 * be deleted by the rotate code.
 			 */
 			rec = &el->l_recs[next_free - 1];
-			ocfs2_adjust_rightmost_records(inode, handle, path,
+			ocfs2_adjust_rightmost_records(handle, et, path,
 						       rec);
 		}
 	} else if (le32_to_cpu(rec->e_cpos) == cpos) {
@@ -5358,7 +5350,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		/* Remove rightmost portion of the record */
 		le16_add_cpu(&rec->e_leaf_clusters, -len);
 		if (is_rightmost_tree_rec)
-			ocfs2_adjust_rightmost_records(inode, handle, path, rec);
+			ocfs2_adjust_rightmost_records(handle, et, path, rec);
 	} else {
 		/* Caller should have trapped this. */
 		mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) "
-- 
cgit v1.2.3


From 043beebb6c467a07ccd7aa666095f87fade1c28e Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:42:30 -0800
Subject: ocfs2: ocfs2_truncate_rec() doesn't need struct inode.

It's not using it anymore.  Remove it from the parameter list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index e1479faeb01c..4022fa4dffb5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5223,16 +5223,16 @@ out:
 	return ret;
 }
 
-static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
+static int ocfs2_truncate_rec(handle_t *handle,
+			      struct ocfs2_extent_tree *et,
 			      struct ocfs2_path *path, int index,
 			      struct ocfs2_cached_dealloc_ctxt *dealloc,
-			      u32 cpos, u32 len,
-			      struct ocfs2_extent_tree *et)
+			      u32 cpos, u32 len)
 {
 	int ret;
 	u32 left_cpos, rec_range, trunc_range;
 	int wants_rotate = 0, is_rightmost_tree_rec = 0;
-	struct super_block *sb = inode->i_sb;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 	struct ocfs2_path *left_path = NULL;
 	struct ocfs2_extent_list *el = path_leaf_el(path);
 	struct ocfs2_extent_rec *rec;
@@ -5271,14 +5271,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 		 * by this leaf and the one to it's left.
 		 *
 		 * There are two cases we can skip:
-		 *   1) Path is the leftmost one in our inode tree.
+		 *   1) Path is the leftmost one in our btree.
 		 *   2) The leaf is rightmost and will be empty after
 		 *      we remove the extent record - the rotate code
 		 *      knows how to update the newly formed edge.
 		 */
 
-		ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path,
-						    &left_cpos);
+		ret = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -5353,8 +5352,9 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
 			ocfs2_adjust_rightmost_records(handle, et, path, rec);
 	} else {
 		/* Caller should have trapped this. */
-		mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) "
-		     "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		mlog(ML_ERROR, "Owner %llu: Invalid record truncate: (%u, %u) "
+		     "(%u, %u)\n",
+		     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 		     le32_to_cpu(rec->e_cpos),
 		     le16_to_cpu(rec->e_leaf_clusters), cpos, len);
 		BUG();
@@ -5447,8 +5447,8 @@ int ocfs2_remove_extent(struct inode *inode,
 	     le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
 
 	if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
-		ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
-					 cpos, len, et);
+		ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
+					 cpos, len);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -5502,8 +5502,8 @@ int ocfs2_remove_extent(struct inode *inode,
 			goto out;
 		}
 
-		ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
-					 cpos, len, et);
+		ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
+					 cpos, len);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
-- 
cgit v1.2.3


From 4c911eefca316f580f174940cd67d561b4b7e6e8 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:50:12 -0800
Subject: ocfs2: Make truncating the extent map an extent_tree_operation.

ocfs2_remove_extent() wants to truncate the extent map if it's
truncating an inode data extent.  But since many btrees can call that
function, let's make it an op on ocfs2_extent_tree.  Other tree types
can leave it empty.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4022fa4dffb5..cdf96974a6a2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -82,6 +82,13 @@ struct ocfs2_extent_tree_operations {
 	void (*eo_update_clusters)(struct ocfs2_extent_tree *et,
 				   u32 new_clusters);
 
+	/*
+	 * If this extent tree is supported by an extent map, truncate the
+	 * map to clusters,
+	 */
+	void (*eo_extent_map_truncate)(struct ocfs2_extent_tree *et,
+				       u32 clusters);
+
 	/*
 	 * If ->eo_insert_check() exists, it is called before rec is
 	 * inserted into the extent tree.  It is optional.
@@ -120,6 +127,8 @@ static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					 u64 blkno);
 static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 					 u32 clusters);
+static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
+					     u32 clusters);
 static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
 				     struct ocfs2_extent_rec *rec);
 static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
@@ -128,6 +137,7 @@ static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
 	.eo_set_last_eb_blk	= ocfs2_dinode_set_last_eb_blk,
 	.eo_get_last_eb_blk	= ocfs2_dinode_get_last_eb_blk,
 	.eo_update_clusters	= ocfs2_dinode_update_clusters,
+	.eo_extent_map_truncate	= ocfs2_dinode_extent_map_truncate,
 	.eo_insert_check	= ocfs2_dinode_insert_check,
 	.eo_sanity_check	= ocfs2_dinode_sanity_check,
 	.eo_fill_root_el	= ocfs2_dinode_fill_root_el,
@@ -162,6 +172,14 @@ static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 	spin_unlock(&oi->ip_lock);
 }
 
+static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
+					     u32 clusters)
+{
+	struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
+
+	ocfs2_extent_map_trunc(inode, clusters);
+}
+
 static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
 				     struct ocfs2_extent_rec *rec)
 {
@@ -400,6 +418,13 @@ static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
 	et->et_ops->eo_update_clusters(et, clusters);
 }
 
+static inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et,
+						u32 clusters)
+{
+	if (et->et_ops->eo_extent_map_truncate)
+		et->et_ops->eo_extent_map_truncate(et, clusters);
+}
+
 static inline int ocfs2_et_root_journal_access(handle_t *handle,
 					       struct ocfs2_extent_tree *et,
 					       int type)
@@ -5106,12 +5131,8 @@ int ocfs2_mark_extent_written(struct inode *inode,
 	/*
 	 * XXX: This should be fixed up so that we just re-insert the
 	 * next extent records.
-	 *
-	 * XXX: This is a hack on the extent tree, maybe it should be
-	 * an op?
 	 */
-	if (et->et_ops == &ocfs2_dinode_et_ops)
-		ocfs2_extent_map_trunc(inode, 0);
+	ocfs2_et_extent_map_truncate(et, 0);
 
 	left_path = ocfs2_new_path_from_et(et);
 	if (!left_path) {
@@ -5393,7 +5414,11 @@ int ocfs2_remove_extent(struct inode *inode,
 	struct ocfs2_extent_list *el;
 	struct ocfs2_path *path = NULL;
 
-	ocfs2_extent_map_trunc(inode, 0);
+	/*
+	 * XXX: Why are we truncating to 0 instead of wherever this
+	 * affects us?
+	 */
+	ocfs2_et_extent_map_truncate(et, 0);
 
 	path = ocfs2_new_path_from_et(et);
 	if (!path) {
-- 
cgit v1.2.3


From d562862314a7b131a630f7b912490312387542fb Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:54:36 -0800
Subject: ocfs2: ocfs2_insert_at_leaf() doesn't need struct inode.

Give it an ocfs2_extent_tree and it is happy.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index cdf96974a6a2..ec9c2ce7bb0c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3809,10 +3809,10 @@ static void ocfs2_subtract_from_rec(struct super_block *sb,
  * list. If this leaf is part of an allocation tree, it is assumed
  * that the tree above has been prepared.
  */
-static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
+static void ocfs2_insert_at_leaf(struct ocfs2_extent_tree *et,
+				 struct ocfs2_extent_rec *insert_rec,
 				 struct ocfs2_extent_list *el,
-				 struct ocfs2_insert_type *insert,
-				 struct inode *inode)
+				 struct ocfs2_insert_type *insert)
 {
 	int i = insert->ins_contig_index;
 	unsigned int range;
@@ -3824,7 +3824,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
 		i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
 		BUG_ON(i == -1);
 		rec = &el->l_recs[i];
-		ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec,
+		ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+					insert->ins_split, rec,
 					insert_rec);
 		goto rotate;
 	}
@@ -3866,10 +3867,10 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
 
 		mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
 				le16_to_cpu(el->l_count),
-				"inode %lu, depth %u, count %u, next free %u, "
+				"owner %llu, depth %u, count %u, next free %u, "
 				"rec.cpos %u, rec.clusters %u, "
 				"insert.cpos %u, insert.clusters %u\n",
-				inode->i_ino,
+				ocfs2_metadata_cache_owner(et->et_ci),
 				le16_to_cpu(el->l_tree_depth),
 				le16_to_cpu(el->l_count),
 				le16_to_cpu(el->l_next_free_rec),
@@ -4171,8 +4172,8 @@ static int ocfs2_insert_path(struct inode *inode,
 			if (ret)
 				mlog_errno(ret);
 	} else
-		ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path),
-				     insert, inode);
+		ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
+				     insert);
 
 	ret = ocfs2_journal_dirty(handle, leaf_bh);
 	if (ret)
@@ -4218,7 +4219,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 	}
 
 	if (le16_to_cpu(el->l_tree_depth) == 0) {
-		ocfs2_insert_at_leaf(insert_rec, el, type, inode);
+		ocfs2_insert_at_leaf(et, insert_rec, el, type);
 		goto out_update_clusters;
 	}
 
-- 
cgit v1.2.3


From c38e52bb1c0187186bd3c4a2b318ffe69cd2fdf8 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:56:23 -0800
Subject: ocfs2: Give ocfs2_split_record() an extent_tree instead of an inode.

Another on the way to generic btree functions.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ec9c2ce7bb0c..b57f976e62f9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4022,7 +4022,7 @@ out:
 	return ret;
 }
 
-static void ocfs2_split_record(struct inode *inode,
+static void ocfs2_split_record(struct ocfs2_extent_tree *et,
 			       struct ocfs2_path *left_path,
 			       struct ocfs2_path *right_path,
 			       struct ocfs2_extent_rec *split_rec,
@@ -4095,7 +4095,8 @@ static void ocfs2_split_record(struct inode *inode,
 	}
 
 	rec = &el->l_recs[index];
-	ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec);
+	ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+				split, rec, split_rec);
 	ocfs2_rotate_leaf(insert_el, split_rec);
 }
 
@@ -4158,7 +4159,7 @@ static int ocfs2_insert_path(struct inode *inode,
 		 * of splits, but it's easier to just let one separate
 		 * function sort it all out.
 		 */
-		ocfs2_split_record(inode, left_path, right_path,
+		ocfs2_split_record(et, left_path, right_path,
 				   insert_rec, insert->ins_split);
 
 		/*
-- 
cgit v1.2.3


From 3505bec01829a8f690259517add55c7941a4d3d5 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 02:57:58 -0800
Subject: ocfs2: ocfs2_do_insert_extent() and ocfs2_insert_path() no longer
 need an inode.

They aren't using it, so remove it from their parameter lists.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b57f976e62f9..cced1766a9ea 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4108,8 +4108,7 @@ static void ocfs2_split_record(struct ocfs2_extent_tree *et,
  * in. left_path should only be passed in if we need to update that
  * portion of the tree after an edge insert.
  */
-static int ocfs2_insert_path(struct inode *inode,
-			     handle_t *handle,
+static int ocfs2_insert_path(handle_t *handle,
 			     struct ocfs2_extent_tree *et,
 			     struct ocfs2_path *left_path,
 			     struct ocfs2_path *right_path,
@@ -4198,8 +4197,7 @@ out:
 	return ret;
 }
 
-static int ocfs2_do_insert_extent(struct inode *inode,
-				  handle_t *handle,
+static int ocfs2_do_insert_extent(handle_t *handle,
 				  struct ocfs2_extent_tree *et,
 				  struct ocfs2_extent_rec *insert_rec,
 				  struct ocfs2_insert_type *type)
@@ -4290,7 +4288,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
 		}
 	}
 
-	ret = ocfs2_insert_path(inode, handle, et, left_path, right_path,
+	ret = ocfs2_insert_path(handle, et, left_path, right_path,
 				insert_rec, type);
 	if (ret) {
 		mlog_errno(ret);
@@ -4718,7 +4716,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	}
 
 	/* Finally, we can add clusters. This might rotate the tree for us. */
-	status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert);
+	status = ocfs2_do_insert_extent(handle, et, &rec, &insert);
 	if (status < 0)
 		mlog_errno(status);
 	else if (et->et_ops == &ocfs2_dinode_et_ops)
@@ -4933,7 +4931,7 @@ leftright:
 		do_leftright = 1;
 	}
 
-	ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
+	ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -5237,7 +5235,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
 	insert.ins_split = SPLIT_RIGHT;
 	insert.ins_tree_depth = depth;
 
-	ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
+	ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
 	if (ret)
 		mlog_errno(ret);
 
-- 
cgit v1.2.3


From b4a176515c715f0c6db1759a39cd9c4175e5a23a Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:07:09 -0800
Subject: ocfs2: ocfs2_extent_contig() only requires the superblock.

Don't pass the inode in.  We don't want it around for generic btree
operations.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index cced1766a9ea..2431bbba6cd8 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -747,7 +747,7 @@ static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
 }
 
 static enum ocfs2_contig_type
-	ocfs2_extent_contig(struct inode *inode,
+	ocfs2_extent_contig(struct super_block *sb,
 			    struct ocfs2_extent_rec *ext,
 			    struct ocfs2_extent_rec *insert_rec)
 {
@@ -762,12 +762,12 @@ static enum ocfs2_contig_type
 		return CONTIG_NONE;
 
 	if (ocfs2_extents_adjacent(ext, insert_rec) &&
-	    ocfs2_block_extent_contig(inode->i_sb, ext, blkno))
+	    ocfs2_block_extent_contig(sb, ext, blkno))
 			return CONTIG_RIGHT;
 
 	blkno = le64_to_cpu(ext->e_blkno);
 	if (ocfs2_extents_adjacent(insert_rec, ext) &&
-	    ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno))
+	    ocfs2_block_extent_contig(sb, insert_rec, blkno))
 		return CONTIG_LEFT;
 
 	return CONTIG_NONE;
@@ -4374,7 +4374,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
 				ret = CONTIG_RIGHT;
 		} else {
-			ret = ocfs2_extent_contig(inode, rec, split_rec);
+			ret = ocfs2_extent_contig(inode->i_sb, rec, split_rec);
 		}
 	}
 
@@ -4420,7 +4420,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 	if (rec) {
 		enum ocfs2_contig_type contig_type;
 
-		contig_type = ocfs2_extent_contig(inode, rec, split_rec);
+		contig_type = ocfs2_extent_contig(inode->i_sb, rec, split_rec);
 
 		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
 			ret = CONTIG_LEFTRIGHT;
@@ -4449,7 +4449,7 @@ static void ocfs2_figure_contig_type(struct inode *inode,
 	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
 
 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
-		contig_type = ocfs2_extent_contig(inode, &el->l_recs[i],
+		contig_type = ocfs2_extent_contig(inode->i_sb, &el->l_recs[i],
 						  insert_rec);
 		if (contig_type != CONTIG_NONE) {
 			insert->ins_contig_index = i;
-- 
cgit v1.2.3


From a29702914ad36443d83b5250b3bfa1bf91e6b239 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:09:54 -0800
Subject: ocfs2: Swap inode for extent_tree in
 ocfs2_figure_merge_contig_type().

We don't want struct inode in generic btree operations.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 2431bbba6cd8..9b79150e478f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4312,7 +4312,8 @@ out:
 }
 
 static enum ocfs2_contig_type
-ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
+ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
+			       struct ocfs2_path *path,
 			       struct ocfs2_extent_list *el, int index,
 			       struct ocfs2_extent_rec *split_rec)
 {
@@ -4324,12 +4325,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 	struct ocfs2_path *left_path = NULL, *right_path = NULL;
 	struct buffer_head *bh;
 	struct ocfs2_extent_block *eb;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
 
 	if (index > 0) {
 		rec = &el->l_recs[index - 1];
 	} else if (path->p_tree_depth > 0) {
-		status = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
-						       path, &left_cpos);
+		status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
 		if (status)
 			goto out;
 
@@ -4338,8 +4339,8 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (!left_path)
 				goto out;
 
-			status = ocfs2_find_path(INODE_CACHE(inode),
-						 left_path, left_cpos);
+			status = ocfs2_find_path(et->et_ci, left_path,
+						 left_cpos);
 			if (status)
 				goto out;
 
@@ -4349,7 +4350,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			    le16_to_cpu(new_el->l_count)) {
 				bh = path_leaf_bh(left_path);
 				eb = (struct ocfs2_extent_block *)bh->b_data;
-				ocfs2_error(inode->i_sb,
+				ocfs2_error(sb,
 					    "Extent block #%llu has an "
 					    "invalid l_next_free_rec of "
 					    "%d.  It should have "
@@ -4374,7 +4375,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
 				ret = CONTIG_RIGHT;
 		} else {
-			ret = ocfs2_extent_contig(inode->i_sb, rec, split_rec);
+			ret = ocfs2_extent_contig(sb, rec, split_rec);
 		}
 	}
 
@@ -4383,8 +4384,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 		rec = &el->l_recs[index + 1];
 	else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
 		 path->p_tree_depth > 0) {
-		status = ocfs2_find_cpos_for_right_leaf(inode->i_sb,
-							path, &right_cpos);
+		status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
 		if (status)
 			goto out;
 
@@ -4395,7 +4395,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 		if (!right_path)
 			goto out;
 
-		status = ocfs2_find_path(INODE_CACHE(inode), right_path, right_cpos);
+		status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
 		if (status)
 			goto out;
 
@@ -4405,7 +4405,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
 				bh = path_leaf_bh(right_path);
 				eb = (struct ocfs2_extent_block *)bh->b_data;
-				ocfs2_error(inode->i_sb,
+				ocfs2_error(sb,
 					    "Extent block #%llu has an "
 					    "invalid l_next_free_rec of %d",
 					    (unsigned long long)le64_to_cpu(eb->h_blkno),
@@ -4420,7 +4420,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 	if (rec) {
 		enum ocfs2_contig_type contig_type;
 
-		contig_type = ocfs2_extent_contig(inode->i_sb, rec, split_rec);
+		contig_type = ocfs2_extent_contig(sb, rec, split_rec);
 
 		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
 			ret = CONTIG_LEFTRIGHT;
@@ -5035,7 +5035,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 		goto out;
 	}
 
-	ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el,
+	ctxt.c_contig_type = ocfs2_figure_merge_contig_type(et, path, el,
 							    split_index,
 							    split_rec);
 
-- 
cgit v1.2.3


From 1ef61b33148a6b32b6d28383cd72ceeddfc7054d Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:12:33 -0800
Subject: ocfs2: Remove inode from ocfs2_figure_extent_contig().

It already has an ocfs2_extent_tree and doesn't need the inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9b79150e478f..38b1fea9af67 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4437,11 +4437,10 @@ out:
 	return ret;
 }
 
-static void ocfs2_figure_contig_type(struct inode *inode,
+static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
 				     struct ocfs2_insert_type *insert,
 				     struct ocfs2_extent_list *el,
-				     struct ocfs2_extent_rec *insert_rec,
-				     struct ocfs2_extent_tree *et)
+				     struct ocfs2_extent_rec *insert_rec)
 {
 	int i;
 	enum ocfs2_contig_type contig_type = CONTIG_NONE;
@@ -4449,8 +4448,8 @@ static void ocfs2_figure_contig_type(struct inode *inode,
 	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
 
 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
-		contig_type = ocfs2_extent_contig(inode->i_sb, &el->l_recs[i],
-						  insert_rec);
+		contig_type = ocfs2_extent_contig(ocfs2_metadata_cache_get_super(et->et_ci),
+						  &el->l_recs[i], insert_rec);
 		if (contig_type != CONTIG_NONE) {
 			insert->ins_contig_index = i;
 			break;
@@ -4579,7 +4578,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
 		le16_to_cpu(el->l_next_free_rec);
 
 	if (!insert->ins_tree_depth) {
-		ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
+		ocfs2_figure_contig_type(et, insert, el, insert_rec);
 		ocfs2_figure_appending_type(insert, el, insert_rec);
 		return 0;
 	}
@@ -4613,7 +4612,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
          *     into two types of appends: simple record append, or a
          *     rotate inside the tail leaf.
 	 */
-	ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
+	ocfs2_figure_contig_type(et, insert, el, insert_rec);
 
 	/*
 	 * The insert code isn't quite ready to deal with all cases of
-- 
cgit v1.2.3


From 627961b77e68b725851cb227db10084bf15f6920 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:14:38 -0800
Subject: ocfs2: ocfs2_figure_insert_type() no longer needs struct inode.

It's not using it, so remove it from the parameter list.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 38b1fea9af67..3d09f4ba39eb 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4530,8 +4530,7 @@ set_tail_append:
  * All of the information is stored on the ocfs2_insert_type
  * structure.
  */
-static int ocfs2_figure_insert_type(struct inode *inode,
-				    struct ocfs2_extent_tree *et,
+static int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et,
 				    struct buffer_head **last_eb_bh,
 				    struct ocfs2_extent_rec *insert_rec,
 				    int *free_records,
@@ -4691,7 +4690,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec,
+	status = ocfs2_figure_insert_type(et, &last_eb_bh, &rec,
 					  &free_records, &insert);
 	if (status < 0) {
 		mlog_errno(status);
-- 
cgit v1.2.3


From 92ba470c44c1404ff18ca0f4ecce1e5b116bb933 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:18:34 -0800
Subject: ocfs2: Make extent map insertion an extent_tree_operation.

ocfs2_insert_extent() wants to insert a record into the extent map if
it's an inode data extent.  But since many btrees can call that
function, let's make it an op on ocfs2_extent_tree.  Other tree types
can leave it empty.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 3d09f4ba39eb..ed869889c4fb 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -82,6 +82,13 @@ struct ocfs2_extent_tree_operations {
 	void (*eo_update_clusters)(struct ocfs2_extent_tree *et,
 				   u32 new_clusters);
 
+	/*
+	 * If this extent tree is supported by an extent map, insert
+	 * a record into the map.
+	 */
+	void (*eo_extent_map_insert)(struct ocfs2_extent_tree *et,
+				     struct ocfs2_extent_rec *rec);
+
 	/*
 	 * If this extent tree is supported by an extent map, truncate the
 	 * map to clusters,
@@ -127,6 +134,8 @@ static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					 u64 blkno);
 static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 					 u32 clusters);
+static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
+					   struct ocfs2_extent_rec *rec);
 static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
 					     u32 clusters);
 static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
@@ -137,6 +146,7 @@ static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
 	.eo_set_last_eb_blk	= ocfs2_dinode_set_last_eb_blk,
 	.eo_get_last_eb_blk	= ocfs2_dinode_get_last_eb_blk,
 	.eo_update_clusters	= ocfs2_dinode_update_clusters,
+	.eo_extent_map_insert	= ocfs2_dinode_extent_map_insert,
 	.eo_extent_map_truncate	= ocfs2_dinode_extent_map_truncate,
 	.eo_insert_check	= ocfs2_dinode_insert_check,
 	.eo_sanity_check	= ocfs2_dinode_sanity_check,
@@ -172,6 +182,14 @@ static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
 	spin_unlock(&oi->ip_lock);
 }
 
+static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
+					   struct ocfs2_extent_rec *rec)
+{
+	struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
+
+	ocfs2_extent_map_insert_rec(inode, rec);
+}
+
 static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
 					     u32 clusters)
 {
@@ -418,6 +436,13 @@ static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
 	et->et_ops->eo_update_clusters(et, clusters);
 }
 
+static inline void ocfs2_et_extent_map_insert(struct ocfs2_extent_tree *et,
+					      struct ocfs2_extent_rec *rec)
+{
+	if (et->et_ops->eo_extent_map_insert)
+		et->et_ops->eo_extent_map_insert(et, rec);
+}
+
 static inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et,
 						u32 clusters)
 {
@@ -4717,8 +4742,8 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	status = ocfs2_do_insert_extent(handle, et, &rec, &insert);
 	if (status < 0)
 		mlog_errno(status);
-	else if (et->et_ops == &ocfs2_dinode_et_ops)
-		ocfs2_extent_map_insert_rec(inode, &rec);
+	else
+		ocfs2_et_extent_map_insert(et, &rec);
 
 bail:
 	brelse(last_eb_bh);
-- 
cgit v1.2.3


From cc79d8c19e9d39446525a1026f1a21761f5d3cd2 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:24:43 -0800
Subject: ocfs2: ocfs2_insert_extent() no longer needs struct inode.

One more function down, no inode in the entire insert-extent chain.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 19 ++++++++-----------
 fs/ocfs2/alloc.h |  4 +---
 fs/ocfs2/dir.c   | 12 +++++-------
 fs/ocfs2/xattr.c |  2 +-
 4 files changed, 15 insertions(+), 22 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ed869889c4fb..c4943b91c7a4 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4681,13 +4681,11 @@ out:
 }
 
 /*
- * Insert an extent into an inode btree.
+ * Insert an extent into a btree.
  *
- * The caller needs to update fe->i_clusters
+ * The caller needs to update the owning btree's cluster count.
  */
-int ocfs2_insert_extent(struct ocfs2_super *osb,
-			handle_t *handle,
-			struct inode *inode,
+int ocfs2_insert_extent(handle_t *handle,
 			struct ocfs2_extent_tree *et,
 			u32 cpos,
 			u64 start_blk,
@@ -4701,8 +4699,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 	struct ocfs2_insert_type insert = {0, };
 	struct ocfs2_extent_rec rec;
 
-	mlog(0, "add %u clusters at position %u to inode %llu\n",
-	     new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
+	mlog(0, "add %u clusters at position %u to owner %llu\n",
+	     new_clusters, cpos,
+	     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
 
 	memset(&rec, 0, sizeof(rec));
 	rec.e_cpos = cpu_to_le32(cpos);
@@ -4829,8 +4828,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
 	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
-	status = ocfs2_insert_extent(osb, handle, inode, et,
-				     *logical_offset, block,
+	status = ocfs2_insert_extent(handle, et, *logical_offset, block,
 				     num_bits, flags, meta_ac);
 	if (status < 0) {
 		mlog_errno(status);
@@ -7244,8 +7242,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		 * the in-inode data from our pages.
 		 */
 		ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
-		ret = ocfs2_insert_extent(osb, handle, inode, &et,
-					  0, block, 1, 0, NULL);
+		ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
 		if (ret) {
 			mlog_errno(ret);
 			goto out_commit;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 8718e57e70a1..99accd30af0e 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -90,9 +90,7 @@ int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
 			    struct buffer_head **bh);
 
 struct ocfs2_alloc_context;
-int ocfs2_insert_extent(struct ocfs2_super *osb,
-			handle_t *handle,
-			struct inode *inode,
+int ocfs2_insert_extent(handle_t *handle,
 			struct ocfs2_extent_tree *et,
 			u32 cpos,
 			u64 start_blk,
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 088a1b5ce9ac..de490a6d76ba 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2591,7 +2591,6 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir,
 {
 	int ret;
 	u64 phys_blkno;
-	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 
 	ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves,
 					 num_dx_leaves, &phys_blkno);
@@ -2600,7 +2599,7 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir,
 		goto out;
 	}
 
-	ret = ocfs2_insert_extent(osb, handle, dir, et, cpos, phys_blkno, 1, 0,
+	ret = ocfs2_insert_extent(handle, et, cpos, phys_blkno, 1, 0,
 				  meta_ac);
 	if (ret)
 		mlog_errno(ret);
@@ -3094,7 +3093,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	 * This should never fail as our extent list is empty and all
 	 * related blocks have been journaled already.
 	 */
-	ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len,
+	ret = ocfs2_insert_extent(handle, &et, 0, blkno, len,
 				  0, NULL);
 	if (ret) {
 		mlog_errno(ret);
@@ -3127,7 +3126,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 						      dirdata_bh);
 		} else {
 			ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh);
-			ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0,
+			ret = ocfs2_insert_extent(handle, &dx_et, 0,
 						  dx_insert_blkno, 1, 0, NULL);
 			if (ret)
 				mlog_errno(ret);
@@ -3147,7 +3146,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 		}
 		blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
 
-		ret = ocfs2_insert_extent(osb, handle, dir, &et, 1,
+		ret = ocfs2_insert_extent(handle, &et, 1,
 					  blkno, len, 0, NULL);
 		if (ret) {
 			mlog_errno(ret);
@@ -4218,8 +4217,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 	/* This should never fail considering we start with an empty
 	 * dx_root. */
 	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
-	ret = ocfs2_insert_extent(osb, handle, dir, &et, 0,
-				  insert_blkno, 1, 0, NULL);
+	ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL);
 	if (ret)
 		mlog_errno(ret);
 	did_quota = 0;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 61819b208315..38db12ab848f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4325,7 +4325,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
 	     num_bits, (unsigned long long)block, v_start);
-	ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
+	ret = ocfs2_insert_extent(handle, &et, v_start, block,
 				  num_bits, 0, ctxt->meta_ac);
 	if (ret < 0) {
 		mlog_errno(ret);
-- 
cgit v1.2.3


From cbee7e1a6a1a2a3d6eda1f76ffc38a3ed3eeb6cc Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:34:15 -0800
Subject: ocfs2: ocfs2_add_clusters_in_btree() no longer needs struct inode.

One more function that doesn't need a struct inode to pass to its
children.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 13 +++++++------
 fs/ocfs2/alloc.h |  6 ++----
 fs/ocfs2/file.c  |  7 +++----
 fs/ocfs2/xattr.c |  7 ++-----
 4 files changed, 14 insertions(+), 19 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index c4943b91c7a4..29095e155949 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4758,13 +4758,11 @@ bail:
  * it is not limited to the file storage. Any extent tree can use this
  * function if it implements the proper ocfs2_extent_tree.
  */
-int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
-				struct inode *inode,
+int ocfs2_add_clusters_in_btree(handle_t *handle,
+				struct ocfs2_extent_tree *et,
 				u32 *logical_offset,
 				u32 clusters_to_add,
 				int mark_unwritten,
-				struct ocfs2_extent_tree *et,
-				handle_t *handle,
 				struct ocfs2_alloc_context *data_ac,
 				struct ocfs2_alloc_context *meta_ac,
 				enum ocfs2_alloc_restarted *reason_ret)
@@ -4775,6 +4773,8 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	u32 bit_off, num_bits;
 	u64 block;
 	u8 flags = 0;
+	struct ocfs2_super *osb =
+		OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
 
 	BUG_ON(!clusters_to_add);
 
@@ -4826,8 +4826,9 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
 	}
 
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
-	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
+	mlog(0, "Allocating %u clusters at block %u for owner %llu\n",
+	     num_bits, bit_off,
+	     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
 	status = ocfs2_insert_extent(handle, et, *logical_offset, block,
 				     num_bits, flags, meta_ac);
 	if (status < 0) {
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 99accd30af0e..d1d196eada8f 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -103,13 +103,11 @@ enum ocfs2_alloc_restarted {
 	RESTART_TRANS,
 	RESTART_META
 };
-int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
-				struct inode *inode,
+int ocfs2_add_clusters_in_btree(handle_t *handle,
+				struct ocfs2_extent_tree *et,
 				u32 *logical_offset,
 				u32 clusters_to_add,
 				int mark_unwritten,
-				struct ocfs2_extent_tree *et,
-				handle_t *handle,
 				struct ocfs2_alloc_context *data_ac,
 				struct ocfs2_alloc_context *meta_ac,
 				enum ocfs2_alloc_restarted *reason_ret);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3ddbc5e917e2..891e2c1e625c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -516,10 +516,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
 	struct ocfs2_extent_tree et;
 
 	ocfs2_init_dinode_extent_tree(&et, inode, fe_bh);
-	ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset,
-					   clusters_to_add, mark_unwritten,
-					   &et, handle,
-					   data_ac, meta_ac, reason_ret);
+	ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
+					  clusters_to_add, mark_unwritten,
+					  data_ac, meta_ac, reason_ret);
 
 	return ret;
 }
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 38db12ab848f..fdd02c43fa14 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -597,7 +597,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 	int status = 0;
 	handle_t *handle = ctxt->handle;
 	enum ocfs2_alloc_restarted why;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
 	struct ocfs2_extent_tree et;
 
@@ -613,13 +612,11 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 	}
 
 	prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
-	status = ocfs2_add_clusters_in_btree(osb,
-					     inode,
+	status = ocfs2_add_clusters_in_btree(handle,
+					     &et,
 					     &logical_start,
 					     clusters_to_add,
 					     0,
-					     &et,
-					     handle,
 					     ctxt->data_ac,
 					     ctxt->meta_ac,
 					     &why);
-- 
cgit v1.2.3


From dbdcf6a48a40e6c9d7081393d793c4f1c5bb4fcf Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:41:26 -0800
Subject: ocfs2: ocfs2_remove_extent() no longer needs struct inode.

One more generic btree function that is isolated from struct inode.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 40 +++++++++++++++++++++-------------------
 fs/ocfs2/alloc.h |  5 ++---
 fs/ocfs2/xattr.c |  4 ++--
 3 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 29095e155949..bfead609f76c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5198,8 +5198,8 @@ out:
 	return ret;
 }
 
-static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
-			    handle_t *handle, struct ocfs2_path *path,
+static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
+			    struct ocfs2_path *path,
 			    int index, u32 new_range,
 			    struct ocfs2_alloc_context *meta_ac)
 {
@@ -5216,7 +5216,8 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
 	 */
 	el = path_leaf_el(path);
 	rec = &el->l_recs[index];
-	ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec);
+	ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+				   &split_rec, new_range, rec);
 
 	depth = path->p_tree_depth;
 	if (depth > 0) {
@@ -5424,9 +5425,9 @@ out:
 	return ret;
 }
 
-int ocfs2_remove_extent(struct inode *inode,
+int ocfs2_remove_extent(handle_t *handle,
 			struct ocfs2_extent_tree *et,
-			u32 cpos, u32 len, handle_t *handle,
+			u32 cpos, u32 len,
 			struct ocfs2_alloc_context *meta_ac,
 			struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
@@ -5458,10 +5459,11 @@ int ocfs2_remove_extent(struct inode *inode,
 	el = path_leaf_el(path);
 	index = ocfs2_search_extent_list(el, cpos);
 	if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
-		ocfs2_error(inode->i_sb,
-			    "Inode %llu has an extent at cpos %u which can no "
+		ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+			    "Owner %llu has an extent at cpos %u which can no "
 			    "longer be found.\n",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
+			    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+			    cpos);
 		ret = -EROFS;
 		goto out;
 	}
@@ -5488,9 +5490,10 @@ int ocfs2_remove_extent(struct inode *inode,
 
 	BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
 
-	mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d "
+	mlog(0, "Owner %llu, remove (cpos %u, len %u). Existing index %d "
 	     "(cpos %u, len %u)\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index,
+	     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+	     cpos, len, index,
 	     le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
 
 	if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
@@ -5501,7 +5504,7 @@ int ocfs2_remove_extent(struct inode *inode,
 			goto out;
 		}
 	} else {
-		ret = ocfs2_split_tree(inode, et, handle, path, index,
+		ret = ocfs2_split_tree(handle, et, path, index,
 				       trunc_range, meta_ac);
 		if (ret) {
 			mlog_errno(ret);
@@ -5523,9 +5526,9 @@ int ocfs2_remove_extent(struct inode *inode,
 		el = path_leaf_el(path);
 		index = ocfs2_search_extent_list(el, cpos);
 		if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu: split at cpos %u lost record.",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu: split at cpos %u lost record.",
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 				    cpos);
 			ret = -EROFS;
 			goto out;
@@ -5539,10 +5542,10 @@ int ocfs2_remove_extent(struct inode *inode,
 		rec_range = le32_to_cpu(rec->e_cpos) +
 			ocfs2_rec_clusters(el, rec);
 		if (rec_range != trunc_range) {
-			ocfs2_error(inode->i_sb,
-				    "Inode %llu: error after split at cpos %u"
+			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+				    "Owner %llu: error after split at cpos %u"
 				    "trunc len %u, existing record is (%u,%u)",
-				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
 				    cpos, len, le32_to_cpu(rec->e_cpos),
 				    ocfs2_rec_clusters(el, rec));
 			ret = -EROFS;
@@ -5607,8 +5610,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 	vfs_dq_free_space_nodirty(inode,
 				  ocfs2_clusters_to_bytes(inode->i_sb, len));
 
-	ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
-				  dealloc);
+	ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index d1d196eada8f..abc66ce9d418 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -117,9 +117,8 @@ int ocfs2_mark_extent_written(struct inode *inode,
 			      handle_t *handle, u32 cpos, u32 len, u32 phys,
 			      struct ocfs2_alloc_context *meta_ac,
 			      struct ocfs2_cached_dealloc_ctxt *dealloc);
-int ocfs2_remove_extent(struct inode *inode,
-			struct ocfs2_extent_tree *et,
-			u32 cpos, u32 len, handle_t *handle,
+int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
+			u32 cpos, u32 len,
 			struct ocfs2_alloc_context *meta_ac,
 			struct ocfs2_cached_dealloc_ctxt *dealloc);
 int ocfs2_remove_btree_range(struct inode *inode,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fdd02c43fa14..96f973a302fa 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -663,7 +663,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
+	ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
 				  &ctxt->dealloc);
 	if (ret) {
 		mlog_errno(ret);
@@ -4881,7 +4881,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
+	ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
 				  &dealloc);
 	if (ret) {
 		mlog_errno(ret);
-- 
cgit v1.2.3


From d231129f44e7ead14f5f496e664ff1e3883a7b25 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:43:22 -0800
Subject: ocfs2: ocfs2_split_and_insert() no longer needs struct inode.

It already has an extent_tree.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index bfead609f76c..85cd2adcc824 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4879,10 +4879,9 @@ static void ocfs2_make_right_split_rec(struct super_block *sb,
 	split_rec->e_flags = rec->e_flags;
 }
 
-static int ocfs2_split_and_insert(struct inode *inode,
-				  handle_t *handle,
-				  struct ocfs2_path *path,
+static int ocfs2_split_and_insert(handle_t *handle,
 				  struct ocfs2_extent_tree *et,
+				  struct ocfs2_path *path,
 				  struct buffer_head **last_eb_bh,
 				  int split_index,
 				  struct ocfs2_extent_rec *orig_split_rec,
@@ -4944,8 +4943,8 @@ leftright:
 		 */
 		insert.ins_split = SPLIT_RIGHT;
 
-		ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range,
-					   &rec);
+		ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
+					   &tmprec, insert_range, &rec);
 
 		split_rec = tmprec;
 
@@ -5100,7 +5099,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 						       path, el,
 						       split_index, split_rec);
 		else
-			ret = ocfs2_split_and_insert(inode, handle, path, et,
+			ret = ocfs2_split_and_insert(handle, et, path,
 						     &last_eb_bh, split_index,
 						     split_rec, meta_ac);
 		if (ret)
-- 
cgit v1.2.3


From f3868d0fa2e20d923087a8296fda47b0afe7f9ba Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 17 Feb 2009 19:46:04 -0800
Subject: ocfs2: Teach ocfs2_replace_extent_rec() to use an extent_tree.

Don't use a struct inode anymore.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 85cd2adcc824..7b0f2cd9f66f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4983,8 +4983,8 @@ out:
 	return ret;
 }
 
-static int ocfs2_replace_extent_rec(struct inode *inode,
-				    handle_t *handle,
+static int ocfs2_replace_extent_rec(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
 				    struct ocfs2_path *path,
 				    struct ocfs2_extent_list *el,
 				    int split_index,
@@ -4992,7 +4992,7 @@ static int ocfs2_replace_extent_rec(struct inode *inode,
 {
 	int ret;
 
-	ret = ocfs2_path_bh_journal_access(handle, INODE_CACHE(inode), path,
+	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
 					   path_num_items(path) - 1);
 	if (ret) {
 		mlog_errno(ret);
@@ -5095,8 +5095,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 
 	if (ctxt.c_contig_type == CONTIG_NONE) {
 		if (ctxt.c_split_covers_rec)
-			ret = ocfs2_replace_extent_rec(inode, handle,
-						       path, el,
+			ret = ocfs2_replace_extent_rec(handle, et, path, el,
 						       split_index, split_rec);
 		else
 			ret = ocfs2_split_and_insert(handle, et, path,
-- 
cgit v1.2.3


From a1cf076ba93f9fdf3eb4195f9f43d1e7cb7550f2 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:45:49 -0800
Subject: ocfs2: __ocfs2_mark_extent_written() doesn't need struct inode.

We only allow unwritten extents on data, so the toplevel
ocfs2_mark_extent_written() can use an inode all it wants.  But the
subfunction isn't even using the inode argument.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7b0f2cd9f66f..4488685a7022 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5026,9 +5026,8 @@ out:
  * have been brought into cache (and pinned via the journal), so the
  * extra overhead is not expressed in terms of disk reads.
  */
-static int __ocfs2_mark_extent_written(struct inode *inode,
+static int __ocfs2_mark_extent_written(handle_t *handle,
 				       struct ocfs2_extent_tree *et,
-				       handle_t *handle,
 				       struct ocfs2_path *path,
 				       int split_index,
 				       struct ocfs2_extent_rec *split_rec,
@@ -5062,7 +5061,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 
 	/*
 	 * The core merge / split code wants to know how much room is
-	 * left in this inodes allocation tree, so we pass the
+	 * left in this allocation tree, so we pass the
 	 * rightmost extent list.
 	 */
 	if (path->p_tree_depth) {
@@ -5185,7 +5184,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
 	split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
 	split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
 
-	ret = __ocfs2_mark_extent_written(inode, et, handle, left_path,
+	ret = __ocfs2_mark_extent_written(handle, et, left_path,
 					  index, &split_rec, meta_ac,
 					  dealloc);
 	if (ret)
-- 
cgit v1.2.3


From 5e404e9ed1b05cafb044bd46792e50197df805ed Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 13 Feb 2009 03:54:22 -0800
Subject: ocfs2: Pass ocfs2_caching_info into ocfs_init_*_extent_tree().

With this commit, extent tree operations are divorced from inodes and
rely on ocfs2_caching_info.  Phew!

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 22 +++++++++++-----------
 fs/ocfs2/alloc.h |  8 ++++----
 fs/ocfs2/aops.c  |  6 ++++--
 fs/ocfs2/dir.c   | 15 +++++++++------
 fs/ocfs2/file.c  |  6 +++---
 fs/ocfs2/xattr.c |  8 ++++----
 6 files changed, 35 insertions(+), 30 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4488685a7022..ab4d2b59b472 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -366,7 +366,7 @@ static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
 };
 
 static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
-				     struct inode *inode,
+				     struct ocfs2_caching_info *ci,
 				     struct buffer_head *bh,
 				     ocfs2_journal_access_func access,
 				     void *obj,
@@ -374,7 +374,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 {
 	et->et_ops = ops;
 	et->et_root_bh = bh;
-	et->et_ci = INODE_CACHE(inode);
+	et->et_ci = ci;
 	et->et_root_journal_access = access;
 	if (!obj)
 		obj = (void *)bh->b_data;
@@ -388,34 +388,34 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 }
 
 void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
-				   struct inode *inode,
+				   struct ocfs2_caching_info *ci,
 				   struct buffer_head *bh)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di,
+	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_di,
 				 NULL, &ocfs2_dinode_et_ops);
 }
 
 void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
-				       struct inode *inode,
+				       struct ocfs2_caching_info *ci,
 				       struct buffer_head *bh)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb,
+	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_xb,
 				 NULL, &ocfs2_xattr_tree_et_ops);
 }
 
 void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
-					struct inode *inode,
+					struct ocfs2_caching_info *ci,
 					struct ocfs2_xattr_value_buf *vb)
 {
-	__ocfs2_init_extent_tree(et, inode, vb->vb_bh, vb->vb_access, vb,
+	__ocfs2_init_extent_tree(et, ci, vb->vb_bh, vb->vb_access, vb,
 				 &ocfs2_xattr_value_et_ops);
 }
 
 void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
-				    struct inode *inode,
+				    struct ocfs2_caching_info *ci,
 				    struct buffer_head *bh)
 {
-	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr,
+	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_dr,
 				 NULL, &ocfs2_dx_root_et_ops);
 }
 
@@ -7241,7 +7241,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		 * this proves to be false, we could always re-build
 		 * the in-inode data from our pages.
 		 */
-		ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
+		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
 		ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
 		if (ret) {
 			mlog_errno(ret);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index abc66ce9d418..bcf6aa42ae53 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -68,17 +68,17 @@ struct ocfs2_extent_tree {
  * specified object buffer.
  */
 void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
-				   struct inode *inode,
+				   struct ocfs2_caching_info *ci,
 				   struct buffer_head *bh);
 void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
-				       struct inode *inode,
+				       struct ocfs2_caching_info *ci,
 				       struct buffer_head *bh);
 struct ocfs2_xattr_value_buf;
 void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
-					struct inode *inode,
+					struct ocfs2_caching_info *ci,
 					struct ocfs2_xattr_value_buf *vb);
 void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
-				    struct inode *inode,
+				    struct ocfs2_caching_info *ci,
 				    struct buffer_head *bh);
 
 /*
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 49eef2c6f4aa..15c594dfd951 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1259,7 +1259,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
 			goto out;
 		}
 	} else if (unwritten) {
-		ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
+		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
+					      wc->w_di_bh);
 		ret = ocfs2_mark_extent_written(inode, &et,
 						wc->w_handle, cpos, 1, phys,
 						meta_ac, &wc->w_dealloc);
@@ -1726,7 +1727,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 		     (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
 		     clusters_to_alloc, extents_to_split);
 
-		ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
+		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
+					      wc->w_di_bh);
 		ret = ocfs2_lock_allocators(inode, &et,
 					    clusters_to_alloc, extents_to_split,
 					    &data_ac, &meta_ac);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index de490a6d76ba..28c3ec238796 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2903,7 +2903,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	struct ocfs2_extent_tree dx_et;
 	int did_quota = 0, bytes_allocated = 0;
 
-	ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir), di_bh);
 
 	alloc = ocfs2_clusters_for_bytes(sb, bytes);
 	dx_alloc = 0;
@@ -3125,7 +3125,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 			ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
 						      dirdata_bh);
 		} else {
-			ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh);
+			ocfs2_init_dx_root_extent_tree(&dx_et,
+						       INODE_CACHE(dir),
+						       dx_root_bh);
 			ret = ocfs2_insert_extent(handle, &dx_et, 0,
 						  dx_insert_blkno, 1, 0, NULL);
 			if (ret)
@@ -3345,7 +3347,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 	spin_lock(&OCFS2_I(dir)->ip_lock);
 	if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
 		spin_unlock(&OCFS2_I(dir)->ip_lock);
-		ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
+		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir),
+					      parent_fe_bh);
 		num_free_extents = ocfs2_num_free_extents(osb, &et);
 		if (num_free_extents < 0) {
 			status = num_free_extents;
@@ -3837,7 +3840,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 	     (unsigned long long)OCFS2_I(dir)->ip_blkno,
 	     (unsigned long long)leaf_blkno, insert_hash);
 
-	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
+	ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
 
 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
 	/*
@@ -4216,7 +4219,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 
 	/* This should never fail considering we start with an empty
 	 * dx_root. */
-	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
+	ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
 	ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL);
 	if (ret)
 		mlog_errno(ret);
@@ -4540,7 +4543,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
 	if (ocfs2_dx_root_inline(dx_root))
 		goto remove_index;
 
-	ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
+	ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
 
 	/* XXX: What if dr_clusters is too large? */
 	while (le32_to_cpu(dx_root->dr_clusters)) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 891e2c1e625c..4921b4ee9431 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -515,7 +515,7 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
 	int ret;
 	struct ocfs2_extent_tree et;
 
-	ocfs2_init_dinode_extent_tree(&et, inode, fe_bh);
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
 	ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
 					  clusters_to_add, mark_unwritten,
 					  data_ac, meta_ac, reason_ret);
@@ -563,7 +563,7 @@ restart_all:
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
 	     clusters_to_add);
-	ocfs2_init_dinode_extent_tree(&et, inode, bh);
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
 	status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
 				       &data_ac, &meta_ac);
 	if (status) {
@@ -1394,7 +1394,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
 	struct address_space *mapping = inode->i_mapping;
 	struct ocfs2_extent_tree et;
 
-	ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
 	ocfs2_init_dealloc_ctxt(&dealloc);
 
 	if (byte_len == 0)
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 96f973a302fa..1bf12c453f99 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -602,7 +602,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
 
-	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
+	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 
 	status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			      OCFS2_JOURNAL_ACCESS_WRITE);
@@ -654,7 +654,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 	handle_t *handle = ctxt->handle;
 	struct ocfs2_extent_tree et;
 
-	ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
+	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 
 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 			    OCFS2_JOURNAL_ACCESS_WRITE);
@@ -4266,7 +4266,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     prev_cpos, (unsigned long long)bucket_blkno(first));
 
-	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
+	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
 
 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
@@ -4841,7 +4841,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 	struct ocfs2_cached_dealloc_ctxt dealloc;
 	struct ocfs2_extent_tree et;
 
-	ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
+	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
 
 	ocfs2_init_dealloc_ctxt(&dealloc);
 
-- 
cgit v1.2.3


From 721f69c404c51a5d1dc93fddb48ee936e8e23770 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:17:49 +0800
Subject: ocfs2: Define refcount tree structure.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/ocfs2_fs.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 85 insertions(+), 3 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 7ab6e9e5e77c..e4288b446ec0 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -68,6 +68,7 @@
 #define OCFS2_DIR_TRAILER_SIGNATURE	"DIRTRL1"
 #define OCFS2_DX_ROOT_SIGNATURE		"DXDIR01"
 #define OCFS2_DX_LEAF_SIGNATURE		"DXLEAF1"
+#define OCFS2_REFCOUNT_BLOCK_SIGNATURE	"REFCNT1"
 
 /* Compatibility flags */
 #define OCFS2_HAS_COMPAT_FEATURE(sb,mask)			\
@@ -160,6 +161,9 @@
 /* Metadata checksum and error correction */
 #define OCFS2_FEATURE_INCOMPAT_META_ECC		0x0800
 
+/* Refcount tree support */
+#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE	0x1000
+
 /*
  * backup superblock flag is used to indicate that this volume
  * has backup superblocks.
@@ -223,6 +227,7 @@
 #define OCFS2_HAS_XATTR_FL	(0x0002)
 #define OCFS2_INLINE_XATTR_FL	(0x0004)
 #define OCFS2_INDEXED_DIR_FL	(0x0008)
+#define OCFS2_HAS_REFCOUNT_FL   (0x0010)
 
 /* Inode attributes, keep in sync with EXT2 */
 #define OCFS2_SECRM_FL		(0x00000001)	/* Secure deletion */
@@ -241,8 +246,11 @@
 /*
  * Extent record flags (e_node.leaf.flags)
  */
-#define OCFS2_EXT_UNWRITTEN	(0x01)	/* Extent is allocated but
-					 * unwritten */
+#define OCFS2_EXT_UNWRITTEN		(0x01)	/* Extent is allocated but
+						 * unwritten */
+#define OCFS2_EXT_REFCOUNTED		(0x02)  /* Extent is reference
+						 * counted in an associated
+						 * refcount tree */
 
 /*
  * ioctl commands
@@ -717,7 +725,8 @@ struct ocfs2_dinode {
 	__le64 i_xattr_loc;
 /*80*/	struct ocfs2_block_check i_check;	/* Error checking */
 /*88*/	__le64 i_dx_root;		/* Pointer to dir index root block */
-	__le64 i_reserved2[5];
+/*90*/	__le64 i_refcount_loc;
+	__le64 i_reserved2[4];
 /*B8*/	union {
 		__le64 i_pad1;		/* Generic way to refer to this
 					   64bit union */
@@ -901,6 +910,59 @@ struct ocfs2_group_desc
 /*40*/	__u8    bg_bitmap[0];
 };
 
+struct ocfs2_refcount_rec {
+/*00*/	__le64 r_cpos;		/* Physical offset, in clusters */
+	__le32 r_clusters;	/* Clusters covered by this extent */
+	__le32 r_refcount;	/* Reference count of this extent */
+/*10*/
+};
+
+#define OCFS2_REFCOUNT_LEAF_FL          (0x00000001)
+#define OCFS2_REFCOUNT_TREE_FL          (0x00000002)
+
+struct ocfs2_refcount_list {
+/*00*/	__le16 rl_count;	/* Maximum number of entries possible
+				   in rl_records */
+	__le16 rl_used;		/* Current number of used records */
+	__le32 rl_reserved2;
+	__le64 rl_reserved1;	/* Pad to sizeof(ocfs2_refcount_record) */
+/*10*/	struct ocfs2_refcount_rec rl_recs[0];	/* Refcount records */
+};
+
+
+struct ocfs2_refcount_block {
+/*00*/	__u8 rf_signature[8];		/* Signature for verification */
+	__le16 rf_suballoc_slot;	/* Slot suballocator this block
+					   belongs to */
+	__le16 rf_suballoc_bit;		/* Bit offset in suballocator
+					   block group */
+	__le32 rf_fs_generation;	/* Must match superblock */
+/*10*/	__le64 rf_blkno;		/* Offset on disk, in blocks */
+	__le64 rf_parent;		/* Parent block, only valid if
+					   OCFS2_REFCOUNT_LEAF_FL is set in
+					   rf_flags */
+/*20*/	struct ocfs2_block_check rf_check;	/* Error checking */
+	__le64 rf_last_eb_blk;		/* Pointer to last extent block */
+/*30*/	__le32 rf_count;		/* Number of inodes sharing this
+					   refcount tree */
+	__le32 rf_flags;		/* See the flags above */
+	__le32 rf_clusters;		/* clusters covered by refcount tree. */
+	__le32 rf_cpos;			/* cluster offset in refcount tree.*/
+/*40*/	__le32 rf_generation;		/* generation number. all be the same
+					 * for the same refcount tree. */
+	__le32 rf_reserved0;
+	__le64 rf_reserved1[7];
+/*80*/	union {
+		struct ocfs2_refcount_list rf_records;  /* List of refcount
+							  records */
+		struct ocfs2_extent_list rf_list;	/* Extent record list,
+							only valid if
+							OCFS2_REFCOUNT_TREE_FL
+							is set in rf_flags */
+	};
+/* Actual on-disk size is one block */
+};
+
 /*
  * On disk extended attribute structure for OCFS2.
  */
@@ -1312,6 +1374,26 @@ static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
 
 	return size / sizeof(struct ocfs2_extent_rec);
 }
+
+static inline u16 ocfs2_extent_recs_per_rb(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct ocfs2_refcount_block, rf_list.l_recs);
+
+	return size / sizeof(struct ocfs2_extent_rec);
+}
+
+static inline u16 ocfs2_refcount_recs_per_rb(struct super_block *sb)
+{
+	int size;
+
+	size = sb->s_blocksize -
+		offsetof(struct ocfs2_refcount_block, rf_records.rl_recs);
+
+	return size / sizeof(struct ocfs2_refcount_rec);
+}
 #else
 static inline int ocfs2_fast_symlink_chars(int blocksize)
 {
-- 
cgit v1.2.3


From 93c97087a646429f4dc0d73298d64674ddd5cde8 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:19:20 +0800
Subject: ocfs2: Add metaecc for ocfs2_refcount_block.

Add metaecc and journal trigger for ocfs2_refcount_block.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/journal.c | 15 +++++++++++++++
 fs/ocfs2/journal.h |  3 +++
 2 files changed, 18 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 5b6c0e441445..54c16b66327e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -555,6 +555,14 @@ static struct ocfs2_triggers eb_triggers = {
 	.ot_offset	= offsetof(struct ocfs2_extent_block, h_check),
 };
 
+static struct ocfs2_triggers rb_triggers = {
+	.ot_triggers = {
+		.t_commit = ocfs2_commit_trigger,
+		.t_abort = ocfs2_abort_trigger,
+	},
+	.ot_offset	= offsetof(struct ocfs2_refcount_block, rf_check),
+};
+
 static struct ocfs2_triggers gd_triggers = {
 	.ot_triggers = {
 		.t_commit = ocfs2_commit_trigger,
@@ -677,6 +685,13 @@ int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
 	return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
 }
 
+int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
+			    struct buffer_head *bh, int type)
+{
+	return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
+				      type);
+}
+
 int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type)
 {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 6163f28badda..b2dc125c6e9a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -281,6 +281,9 @@ int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
 /* ocfs2_extent_block */
 int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
+/* ocfs2_refcount_block */
+int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
+			    struct buffer_head *bh, int type);
 /* ocfs2_group_desc */
 int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
 			    struct buffer_head *bh, int type);
-- 
cgit v1.2.3


From f2c870e3b12e38da6d9b5b17c4c8ae56a0ed68e4 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:19:26 +0800
Subject: ocfs2: Add ocfs2_read_refcount_block.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/Makefile          |  1 +
 fs/ocfs2/cluster/masklog.c |  1 +
 fs/ocfs2/cluster/masklog.h |  1 +
 fs/ocfs2/ocfs2.h           |  3 ++
 fs/ocfs2/refcounttree.c    | 99 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 105 insertions(+)
 create mode 100644 fs/ocfs2/refcounttree.c

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 01596079dd63..31f25ce32c97 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -28,6 +28,7 @@ ocfs2-objs := \
 	locks.o			\
 	mmap.o 			\
 	namei.o 		\
+	refcounttree.o		\
 	resize.o		\
 	slot_map.o 		\
 	suballoc.o 		\
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 96df5416993e..1cd2934de615 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -111,6 +111,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
 	define_mask(EXPORT),
 	define_mask(XATTR),
 	define_mask(QUOTA),
+	define_mask(REFCOUNT),
 	define_mask(ERROR),
 	define_mask(NOTICE),
 	define_mask(KTHREAD),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 696c32e50716..9b4d11726cf2 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -113,6 +113,7 @@
 #define ML_EXPORT	0x0000000010000000ULL /* ocfs2 export operations */
 #define ML_XATTR	0x0000000020000000ULL /* ocfs2 extended attributes */
 #define ML_QUOTA	0x0000000040000000ULL /* ocfs2 quota operations */
+#define ML_REFCOUNT	0x0000000080000000ULL /* refcount tree operations */
 /* bits that are infrequently given and frequently matched in the high word */
 #define ML_ERROR	0x0000000100000000ULL /* sent to KERN_ERR */
 #define ML_NOTICE	0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d370262b3621..6688d19e4451 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -610,6 +610,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
 #define OCFS2_IS_VALID_DX_LEAF(ptr)					\
 	(!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
 
+#define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr)				\
+	(!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE))
+
 static inline unsigned long ino_from_blkno(struct super_block *sb,
 					   u64 blkno)
 {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
new file mode 100644
index 000000000000..a923535d9c37
--- /dev/null
+++ b/fs/ocfs2/refcounttree.c
@@ -0,0 +1,99 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * refcounttree.c
+ *
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#define MLOG_MASK_PREFIX ML_REFCOUNT
+#include <cluster/masklog.h>
+#include "ocfs2.h"
+#include "inode.h"
+#include "alloc.h"
+#include "suballoc.h"
+#include "journal.h"
+#include "uptodate.h"
+#include "super.h"
+#include "buffer_head_io.h"
+#include "blockcheck.h"
+
+static int ocfs2_validate_refcount_block(struct super_block *sb,
+					 struct buffer_head *bh)
+{
+	int rc;
+	struct ocfs2_refcount_block *rb =
+		(struct ocfs2_refcount_block *)bh->b_data;
+
+	mlog(0, "Validating refcount block %llu\n",
+	     (unsigned long long)bh->b_blocknr);
+
+	BUG_ON(!buffer_uptodate(bh));
+
+	/*
+	 * If the ecc fails, we return the error but otherwise
+	 * leave the filesystem running.  We know any error is
+	 * local to this block.
+	 */
+	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
+	if (rc) {
+		mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
+		     (unsigned long long)bh->b_blocknr);
+		return rc;
+	}
+
+
+	if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
+		ocfs2_error(sb,
+			    "Refcount block #%llu has bad signature %.*s",
+			    (unsigned long long)bh->b_blocknr, 7,
+			    rb->rf_signature);
+		return -EINVAL;
+	}
+
+	if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
+		ocfs2_error(sb,
+			    "Refcount block #%llu has an invalid rf_blkno "
+			    "of %llu",
+			    (unsigned long long)bh->b_blocknr,
+			    (unsigned long long)le64_to_cpu(rb->rf_blkno));
+		return -EINVAL;
+	}
+
+	if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
+		ocfs2_error(sb,
+			    "Refcount block #%llu has an invalid "
+			    "rf_fs_generation of #%u",
+			    (unsigned long long)bh->b_blocknr,
+			    le32_to_cpu(rb->rf_fs_generation));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
+				     u64 rb_blkno,
+				     struct buffer_head **bh)
+{
+	int rc;
+	struct buffer_head *tmp = *bh;
+
+	rc = ocfs2_read_block(ci, rb_blkno, &tmp,
+			      ocfs2_validate_refcount_block);
+
+	/* If ocfs2_read_block() got us a new bh, pass it up. */
+	if (!rc && !*bh)
+		*bh = tmp;
+
+	return rc;
+}
-- 
cgit v1.2.3


From a433848132d8cdfb8173745b922ddb919de11527 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:19:29 +0800
Subject: ocfs2: Abstract caching info checkpoint.

In meta downconvert, we need to checkpoint the metadata in an inode.
For refcount tree, we also need it. So abstract the process out.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/dlmglue.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f518d1bee30a..79db0557df88 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3495,11 +3495,11 @@ out:
 	return UNBLOCK_CONTINUE;
 }
 
-static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
-					int new_level)
+static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
+				 struct ocfs2_lock_res *lockres,
+				 int new_level)
 {
-	struct inode *inode = ocfs2_lock_res_inode(lockres);
-	int checkpointed = ocfs2_ci_fully_checkpointed(INODE_CACHE(inode));
+	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
 
 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
@@ -3507,10 +3507,18 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 	if (checkpointed)
 		return 1;
 
-	ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));
+	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
 	return 0;
 }
 
+static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
+					int new_level)
+{
+	struct inode *inode = ocfs2_lock_res_inode(lockres);
+
+	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
+}
+
 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
 {
 	struct inode *inode = ocfs2_lock_res_inode(lockres);
-- 
cgit v1.2.3


From 8dec98edfe9684ce00b580a09dde3dcd21ee785b Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:19:58 +0800
Subject: ocfs2: Add new refcount tree lock resource in dlmglue.

refcount tree lock resource is used to protect refcount
tree read/write among multiple nodes.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/dlmglue.c      | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/dlmglue.h      |  6 ++++
 fs/ocfs2/ocfs2_lockid.h |  5 ++++
 fs/ocfs2/refcounttree.h | 36 ++++++++++++++++++++++
 4 files changed, 127 insertions(+)
 create mode 100644 fs/ocfs2/refcounttree.h

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 79db0557df88..bb2fc6993e2a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -53,6 +53,7 @@
 #include "super.h"
 #include "uptodate.h"
 #include "quota.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -110,6 +111,11 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
 
 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
 
+static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
+					    int new_level);
+static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
+					 int blocking);
+
 #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
 
 /* This aids in debugging situations where a bad LVB might be involved. */
@@ -278,6 +284,12 @@ static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
 };
 
+static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
+	.check_downconvert = ocfs2_check_refcount_downconvert,
+	.downconvert_worker = ocfs2_refcount_convert_worker,
+	.flags		= 0,
+};
+
 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
 {
 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -306,6 +318,12 @@ static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_re
 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
 }
 
+static inline struct ocfs2_refcount_tree *
+ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
+{
+	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
+}
+
 static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
 {
 	if (lockres->l_ops->get_osb)
@@ -693,6 +711,17 @@ void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
 				   info);
 }
 
+void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
+				  struct ocfs2_super *osb, u64 ref_blkno,
+				  unsigned int generation)
+{
+	ocfs2_lock_res_init_once(lockres);
+	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
+			      generation, lockres->l_name);
+	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
+				   &ocfs2_refcount_block_lops, osb);
+}
+
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
 {
 	mlog_entry_void();
@@ -3648,6 +3677,26 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 	return UNBLOCK_CONTINUE_POST;
 }
 
+static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
+					    int new_level)
+{
+	struct ocfs2_refcount_tree *tree =
+				ocfs2_lock_res_refcount_tree(lockres);
+
+	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
+}
+
+static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
+					 int blocking)
+{
+	struct ocfs2_refcount_tree *tree =
+				ocfs2_lock_res_refcount_tree(lockres);
+
+	ocfs2_metadata_cache_purge(&tree->rf_ci);
+
+	return UNBLOCK_CONTINUE;
+}
+
 static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_qinfo_lvb *lvb;
@@ -3760,6 +3809,37 @@ bail:
 	return status;
 }
 
+int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
+{
+	int status;
+	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
+	struct ocfs2_super *osb = lockres->l_priv;
+
+
+	if (ocfs2_is_hard_readonly(osb))
+		return -EROFS;
+
+	if (ocfs2_mount_local(osb))
+		return 0;
+
+	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+	if (status < 0)
+		mlog_errno(status);
+
+	return status;
+}
+
+void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
+{
+	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
+	struct ocfs2_super *osb = lockres->l_priv;
+
+	if (!ocfs2_mount_local(osb))
+		ocfs2_cluster_unlock(osb, lockres, level);
+}
+
 /*
  * This is the filesystem locking protocol.  It provides the lock handling
  * hooks for the underlying DLM.  It has a maximum version number.
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 7553836931de..d1ce48e1b3d6 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -101,6 +101,9 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
 struct ocfs2_mem_dqinfo;
 void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
                                struct ocfs2_mem_dqinfo *info);
+void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
+				  struct ocfs2_super *osb, u64 ref_blkno,
+				  unsigned int generation);
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
 int ocfs2_create_new_inode_locks(struct inode *inode);
 int ocfs2_drop_inode_locks(struct inode *inode);
@@ -148,6 +151,9 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock);
 void ocfs2_file_unlock(struct file *file);
 int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
 void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
+struct ocfs2_refcount_tree;
+int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex);
+void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
 
 
 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index c212cf5a2bdf..d277aabf5dfb 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -49,6 +49,7 @@ enum ocfs2_lock_type {
 	OCFS2_LOCK_TYPE_QINFO,
 	OCFS2_LOCK_TYPE_NFS_SYNC,
 	OCFS2_LOCK_TYPE_ORPHAN_SCAN,
+	OCFS2_LOCK_TYPE_REFCOUNT,
 	OCFS2_NUM_LOCK_TYPES
 };
 
@@ -89,6 +90,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 		case OCFS2_LOCK_TYPE_ORPHAN_SCAN:
 			c = 'P';
 			break;
+		case OCFS2_LOCK_TYPE_REFCOUNT:
+			c = 'T';
+			break;
 		default:
 			c = '\0';
 	}
@@ -110,6 +114,7 @@ static char *ocfs2_lock_type_strings[] = {
 	[OCFS2_LOCK_TYPE_QINFO] = "Quota",
 	[OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync",
 	[OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
+	[OCFS2_LOCK_TYPE_REFCOUNT] = "Refcount",
 };
 
 static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
new file mode 100644
index 000000000000..9a3695cdbb53
--- /dev/null
+++ b/fs/ocfs2/refcounttree.h
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * refcounttree.h
+ *
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef OCFS2_REFCOUNTTREE_H
+#define OCFS2_REFCOUNTTREE_H
+
+struct ocfs2_refcount_tree {
+	struct rb_node rf_node;
+	u64 rf_blkno;
+	u32 rf_generation;
+	struct rw_semaphore rf_sem;
+	struct ocfs2_lock_res rf_lockres;
+	struct kref rf_getcnt;
+	int rf_removed;
+
+	/* the following 4 fields are used by caching_info. */
+	struct ocfs2_caching_info rf_ci;
+	spinlock_t rf_lock;
+	struct mutex rf_io_mutex;
+	struct super_block *rf_sb;
+};
+
+#endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v1.2.3


From c732eb16bf07f9bfb7fa72b6868462471273bdbd Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:21:00 +0800
Subject: ocfs2: Add caching info for refcount tree.

refcount tree should use its own caching info so that when
we downconvert the refcount tree lock, we can drop all the
cached buffer head.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a923535d9c37..eb0f4a047938 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -26,6 +26,13 @@
 #include "super.h"
 #include "buffer_head_io.h"
 #include "blockcheck.h"
+#include "refcounttree.h"
+
+static inline struct ocfs2_refcount_tree *
+cache_info_to_refcount(struct ocfs2_caching_info *ci)
+{
+	return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
+}
 
 static int ocfs2_validate_refcount_block(struct super_block *sb,
 					 struct buffer_head *bh)
@@ -97,3 +104,55 @@ static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
 
 	return rc;
 }
+
+static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	return rf->rf_blkno;
+}
+
+static struct super_block *
+ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	return rf->rf_sb;
+}
+
+static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	spin_lock(&rf->rf_lock);
+}
+
+static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	spin_unlock(&rf->rf_lock);
+}
+
+static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	mutex_lock(&rf->rf_io_mutex);
+}
+
+static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
+{
+	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
+
+	mutex_unlock(&rf->rf_io_mutex);
+}
+
+static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
+	.co_owner		= ocfs2_refcount_cache_owner,
+	.co_get_super		= ocfs2_refcount_cache_get_super,
+	.co_cache_lock		= ocfs2_refcount_cache_lock,
+	.co_cache_unlock	= ocfs2_refcount_cache_unlock,
+	.co_io_lock		= ocfs2_refcount_cache_io_lock,
+	.co_io_unlock		= ocfs2_refcount_cache_io_unlock,
+};
-- 
cgit v1.2.3


From 374a263e790c4de85844283c098810a92985f623 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 24 Aug 2009 11:13:37 +0800
Subject: ocfs2: Add refcount tree lock mechanism.

Implement locking around struct ocfs2_refcount_tree.  This protects
all read/write operations on refcount trees.  ocfs2_refcount_tree
has its own lock and its own caching_info, protecting buffers among
multiple nodes.

User must call ocfs2_lock_refcount_tree before his operation on
the tree and unlock it after that.

ocfs2_refcount_trees are referenced by the block number of the
refcount tree root block, So we create an rb-tree on the ocfs2_super
to look them up.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/ocfs2.h        |   4 +
 fs/ocfs2/refcounttree.c | 359 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/refcounttree.h |   7 +
 fs/ocfs2/super.c        |   5 +
 4 files changed, 375 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6688d19e4451..bb5357376ef5 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -408,6 +408,10 @@ struct ocfs2_super
 
 	/* the group we used to allocate inodes. */
 	u64				osb_inode_alloc_group;
+
+	/* rb tree root for refcount lock. */
+	struct rb_root	osb_rf_lock_tree;
+	struct ocfs2_refcount_tree *osb_ref_tree_lru;
 };
 
 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index eb0f4a047938..8d79de8637b8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -27,6 +27,7 @@
 #include "buffer_head_io.h"
 #include "blockcheck.h"
 #include "refcounttree.h"
+#include "dlmglue.h"
 
 static inline struct ocfs2_refcount_tree *
 cache_info_to_refcount(struct ocfs2_caching_info *ci)
@@ -156,3 +157,361 @@ static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
 	.co_io_lock		= ocfs2_refcount_cache_io_lock,
 	.co_io_unlock		= ocfs2_refcount_cache_io_unlock,
 };
+
+static struct ocfs2_refcount_tree *
+ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
+{
+	struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
+	struct ocfs2_refcount_tree *tree = NULL;
+
+	while (n) {
+		tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
+
+		if (blkno < tree->rf_blkno)
+			n = n->rb_left;
+		else if (blkno > tree->rf_blkno)
+			n = n->rb_right;
+		else
+			return tree;
+	}
+
+	return NULL;
+}
+
+/* osb_lock is already locked. */
+static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
+				       struct ocfs2_refcount_tree *new)
+{
+	u64 rf_blkno = new->rf_blkno;
+	struct rb_node *parent = NULL;
+	struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
+	struct ocfs2_refcount_tree *tmp;
+
+	while (*p) {
+		parent = *p;
+
+		tmp = rb_entry(parent, struct ocfs2_refcount_tree,
+			       rf_node);
+
+		if (rf_blkno < tmp->rf_blkno)
+			p = &(*p)->rb_left;
+		else if (rf_blkno > tmp->rf_blkno)
+			p = &(*p)->rb_right;
+		else {
+			/* This should never happen! */
+			mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
+			     (unsigned long long)rf_blkno);
+			BUG();
+		}
+	}
+
+	rb_link_node(&new->rf_node, parent, p);
+	rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
+}
+
+static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
+{
+	ocfs2_metadata_cache_exit(&tree->rf_ci);
+	ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
+	ocfs2_lock_res_free(&tree->rf_lockres);
+	kfree(tree);
+}
+
+static inline void
+ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
+					struct ocfs2_refcount_tree *tree)
+{
+	rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
+	if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
+		osb->osb_ref_tree_lru = NULL;
+}
+
+static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
+					struct ocfs2_refcount_tree *tree)
+{
+	spin_lock(&osb->osb_lock);
+	ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
+	spin_unlock(&osb->osb_lock);
+}
+
+void ocfs2_kref_remove_refcount_tree(struct kref *kref)
+{
+	struct ocfs2_refcount_tree *tree =
+		container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
+
+	ocfs2_free_refcount_tree(tree);
+}
+
+static inline void
+ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
+{
+	kref_get(&tree->rf_getcnt);
+}
+
+static inline void
+ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
+{
+	kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
+}
+
+static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
+					       struct super_block *sb)
+{
+	ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
+	mutex_init(&new->rf_io_mutex);
+	new->rf_sb = sb;
+	spin_lock_init(&new->rf_lock);
+}
+
+static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
+					struct ocfs2_refcount_tree *new,
+					u64 rf_blkno, u32 generation)
+{
+	init_rwsem(&new->rf_sem);
+	ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
+				     rf_blkno, generation);
+}
+
+static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
+				   struct ocfs2_refcount_tree **ret_tree)
+{
+	int ret = 0;
+	struct ocfs2_refcount_tree *tree, *new = NULL;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_block *ref_rb;
+
+	spin_lock(&osb->osb_lock);
+	if (osb->osb_ref_tree_lru &&
+	    osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
+		tree = osb->osb_ref_tree_lru;
+	else
+		tree = ocfs2_find_refcount_tree(osb, rf_blkno);
+	if (tree)
+		goto out;
+
+	spin_unlock(&osb->osb_lock);
+
+	new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
+	if (!new) {
+		ret = -ENOMEM;
+		return ret;
+	}
+
+	new->rf_blkno = rf_blkno;
+	kref_init(&new->rf_getcnt);
+	ocfs2_init_refcount_tree_ci(new, osb->sb);
+
+	/*
+	 * We need the generation to create the refcount tree lock and since
+	 * it isn't changed during the tree modification, we are safe here to
+	 * read without protection.
+	 * We also have to purge the cache after we create the lock since the
+	 * refcount block may have the stale data. It can only be trusted when
+	 * we hold the refcount lock.
+	 */
+	ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		ocfs2_metadata_cache_exit(&new->rf_ci);
+		kfree(new);
+		return ret;
+	}
+
+	ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
+	ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
+				      new->rf_generation);
+	ocfs2_metadata_cache_purge(&new->rf_ci);
+
+	spin_lock(&osb->osb_lock);
+	tree = ocfs2_find_refcount_tree(osb, rf_blkno);
+	if (tree)
+		goto out;
+
+	ocfs2_insert_refcount_tree(osb, new);
+
+	tree = new;
+	new = NULL;
+
+out:
+	*ret_tree = tree;
+
+	osb->osb_ref_tree_lru = tree;
+
+	spin_unlock(&osb->osb_lock);
+
+	if (new)
+		ocfs2_free_refcount_tree(new);
+
+	brelse(ref_root_bh);
+	return ret;
+}
+
+static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
+{
+	int ret;
+	struct buffer_head *di_bh = NULL;
+	struct ocfs2_dinode *di;
+
+	ret = ocfs2_read_inode_block(inode, &di_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	di = (struct ocfs2_dinode *)di_bh->b_data;
+	*ref_blkno = le64_to_cpu(di->i_refcount_loc);
+	brelse(di_bh);
+out:
+	return ret;
+}
+
+static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
+				      struct ocfs2_refcount_tree *tree, int rw)
+{
+	int ret;
+
+	ret = ocfs2_refcount_lock(tree, rw);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (rw)
+		down_write(&tree->rf_sem);
+	else
+		down_read(&tree->rf_sem);
+
+out:
+	return ret;
+}
+
+/*
+ * Lock the refcount tree pointed by ref_blkno and return the tree.
+ * In most case, we lock the tree and read the refcount block.
+ * So read it here if the caller really needs it.
+ *
+ * If the tree has been re-created by other node, it will free the
+ * old one and re-create it.
+ */
+int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
+			     u64 ref_blkno, int rw,
+			     struct ocfs2_refcount_tree **ret_tree,
+			     struct buffer_head **ref_bh)
+{
+	int ret, delete_tree = 0;
+	struct ocfs2_refcount_tree *tree = NULL;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_block *rb;
+
+again:
+	ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	ocfs2_refcount_tree_get(tree);
+
+	ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
+	if (ret) {
+		mlog_errno(ret);
+		ocfs2_refcount_tree_put(tree);
+		goto out;
+	}
+
+	ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
+					&ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		ocfs2_unlock_refcount_tree(osb, tree, rw);
+		ocfs2_refcount_tree_put(tree);
+		goto out;
+	}
+
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	/*
+	 * If the refcount block has been freed and re-created, we may need
+	 * to recreate the refcount tree also.
+	 *
+	 * Here we just remove the tree from the rb-tree, and the last
+	 * kref holder will unlock and delete this refcount_tree.
+	 * Then we goto "again" and ocfs2_get_refcount_tree will create
+	 * the new refcount tree for us.
+	 */
+	if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
+		if (!tree->rf_removed) {
+			ocfs2_erase_refcount_tree_from_list(osb, tree);
+			tree->rf_removed = 1;
+			delete_tree = 1;
+		}
+
+		ocfs2_unlock_refcount_tree(osb, tree, rw);
+		/*
+		 * We get an extra reference when we create the refcount
+		 * tree, so another put will destroy it.
+		 */
+		if (delete_tree)
+			ocfs2_refcount_tree_put(tree);
+		brelse(ref_root_bh);
+		ref_root_bh = NULL;
+		goto again;
+	}
+
+	*ret_tree = tree;
+	if (ref_bh) {
+		*ref_bh = ref_root_bh;
+		ref_root_bh = NULL;
+	}
+out:
+	brelse(ref_root_bh);
+	return ret;
+}
+
+int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
+				      struct ocfs2_refcount_tree **ret_tree,
+				      struct buffer_head **ref_bh)
+{
+	int ret;
+	u64 ref_blkno;
+
+	ret = ocfs2_get_refcount_block(inode, &ref_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
+					rw, ret_tree, ref_bh);
+}
+
+void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
+				struct ocfs2_refcount_tree *tree, int rw)
+{
+	if (rw)
+		up_write(&tree->rf_sem);
+	else
+		up_read(&tree->rf_sem);
+
+	ocfs2_refcount_unlock(tree, rw);
+	ocfs2_refcount_tree_put(tree);
+}
+
+void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
+{
+	struct rb_node *node;
+	struct ocfs2_refcount_tree *tree;
+	struct rb_root *root = &osb->osb_rf_lock_tree;
+
+	while ((node = rb_last(root)) != NULL) {
+		tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
+
+		mlog(0, "Purge tree %llu\n",
+		     (unsigned long long) tree->rf_blkno);
+
+		rb_erase(&tree->rf_node, root);
+		ocfs2_free_refcount_tree(tree);
+	}
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 9a3695cdbb53..2ea7fc52c23c 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -33,4 +33,11 @@ struct ocfs2_refcount_tree {
 	struct super_block *rf_sb;
 };
 
+void ocfs2_purge_refcount_trees(struct ocfs2_super *osb);
+int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw,
+			     struct ocfs2_refcount_tree **tree,
+			     struct buffer_head **ref_bh);
+void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
+				struct ocfs2_refcount_tree *tree,
+				int rw);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index e35a5052ce3a..8b6062176970 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -69,6 +69,7 @@
 #include "ver.h"
 #include "xattr.h"
 #include "quota.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -1858,6 +1859,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
 	ocfs2_sync_blockdev(sb);
 
+	ocfs2_purge_refcount_trees(osb);
+
 	/* No cluster connection means we've failed during mount, so skip
 	 * all the steps which depended on that to complete. */
 	if (osb->cconn) {
@@ -2064,6 +2067,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 		goto bail;
 	}
 
+	osb->osb_rf_lock_tree = RB_ROOT;
+
 	osb->s_feature_compat =
 		le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);
 	osb->s_feature_ro_compat =
-- 
cgit v1.2.3


From 8bf396de984e68491569b49770e4fd7aca40ba65 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 24 Aug 2009 11:12:02 +0800
Subject: ocfs2: Basic tree root operation.

Add basic refcount tree root operation.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/journal.h      |  14 ++
 fs/ocfs2/refcounttree.c | 345 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 353 insertions(+), 6 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index b2dc125c6e9a..bd88c8b9f2fb 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -490,6 +490,20 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
 	return credits;
 }
 
+/* inode update, new refcount block and its allocation credits. */
+#define OCFS2_REFCOUNT_TREE_CREATE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1 \
+					    + OCFS2_SUBALLOC_ALLOC)
+
+/* inode and the refcount block update. */
+#define OCFS2_REFCOUNT_TREE_SET_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+
+/*
+ * inode and the refcount block update.
+ * It doesn't include the credits for sub alloc change.
+ * So if we need to free the bit, OCFS2_SUBALLOC_FREE needs to be added.
+ */
+#define OCFS2_REFCOUNT_TREE_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+
 /*
  * Please note that the caller must make sure that root_el is the root
  * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 8d79de8637b8..d0d6fa312b01 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -27,6 +27,7 @@
 #include "buffer_head_io.h"
 #include "blockcheck.h"
 #include "refcounttree.h"
+#include "sysfile.h"
 #include "dlmglue.h"
 
 static inline struct ocfs2_refcount_tree *
@@ -272,6 +273,22 @@ static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
 				     rf_blkno, generation);
 }
 
+static struct ocfs2_refcount_tree*
+ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno)
+{
+	struct ocfs2_refcount_tree *new;
+
+	new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
+	if (!new)
+		return NULL;
+
+	new->rf_blkno = rf_blkno;
+	kref_init(&new->rf_getcnt);
+	ocfs2_init_refcount_tree_ci(new, osb->sb);
+
+	return new;
+}
+
 static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
 				   struct ocfs2_refcount_tree **ret_tree)
 {
@@ -291,16 +308,12 @@ static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
 
 	spin_unlock(&osb->osb_lock);
 
-	new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
+	new = ocfs2_allocate_refcount_tree(osb, rf_blkno);
 	if (!new) {
 		ret = -ENOMEM;
+		mlog_errno(ret);
 		return ret;
 	}
-
-	new->rf_blkno = rf_blkno;
-	kref_init(&new->rf_getcnt);
-	ocfs2_init_refcount_tree_ci(new, osb->sb);
-
 	/*
 	 * We need the generation to create the refcount tree lock and since
 	 * it isn't changed during the tree modification, we are safe here to
@@ -515,3 +528,323 @@ void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
 		ocfs2_free_refcount_tree(tree);
 	}
 }
+
+/*
+ * Create a refcount tree for an inode.
+ * We take for granted that the inode is already locked.
+ */
+static int ocfs2_create_refcount_tree(struct inode *inode,
+				      struct buffer_head *di_bh)
+{
+	int ret;
+	handle_t *handle = NULL;
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct buffer_head *new_bh = NULL;
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
+	u16 suballoc_bit_start;
+	u32 num_got;
+	u64 first_blkno;
+
+	BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+
+	mlog(0, "create tree for inode %lu\n", inode->i_ino);
+
+	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
+				   &suballoc_bit_start, &num_got,
+				   &first_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno);
+	if (!new_tree) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	new_bh = sb_getblk(inode->i_sb, first_blkno);
+	ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
+
+	ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	/* Initialize ocfs2_refcount_block. */
+	rb = (struct ocfs2_refcount_block *)new_bh->b_data;
+	memset(rb, 0, inode->i_sb->s_blocksize);
+	strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
+	rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num);
+	rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+	rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
+	rb->rf_blkno = cpu_to_le64(first_blkno);
+	rb->rf_count = cpu_to_le32(1);
+	rb->rf_records.rl_count =
+			cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
+	spin_lock(&osb->osb_lock);
+	rb->rf_generation = osb->s_next_generation++;
+	spin_unlock(&osb->osb_lock);
+
+	ocfs2_journal_dirty(handle, new_bh);
+
+	spin_lock(&oi->ip_lock);
+	oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
+	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
+	di->i_refcount_loc = cpu_to_le64(first_blkno);
+	spin_unlock(&oi->ip_lock);
+
+	mlog(0, "created tree for inode %lu, refblock %llu\n",
+	     inode->i_ino, (unsigned long long)first_blkno);
+
+	ocfs2_journal_dirty(handle, di_bh);
+
+	/*
+	 * We have to init the tree lock here since it will use
+	 * the generation number to create it.
+	 */
+	new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
+	ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
+				      new_tree->rf_generation);
+
+	spin_lock(&osb->osb_lock);
+	tree = ocfs2_find_refcount_tree(osb, first_blkno);
+
+	/*
+	 * We've just created a new refcount tree in this block.  If
+	 * we found a refcount tree on the ocfs2_super, it must be
+	 * one we just deleted.  We free the old tree before
+	 * inserting the new tree.
+	 */
+	BUG_ON(tree && tree->rf_generation == new_tree->rf_generation);
+	if (tree)
+		ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
+	ocfs2_insert_refcount_tree(osb, new_tree);
+	spin_unlock(&osb->osb_lock);
+	new_tree = NULL;
+	if (tree)
+		ocfs2_refcount_tree_put(tree);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	if (new_tree) {
+		ocfs2_metadata_cache_exit(&new_tree->rf_ci);
+		kfree(new_tree);
+	}
+
+	brelse(new_bh);
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+
+	return ret;
+}
+
+static int ocfs2_set_refcount_tree(struct inode *inode,
+				   struct buffer_head *di_bh,
+				   u64 refcount_loc)
+{
+	int ret;
+	handle_t *handle = NULL;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_refcount_tree *ref_tree;
+
+	BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+
+	ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+				       &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	le32_add_cpu(&rb->rf_count, 1);
+
+	ocfs2_journal_dirty(handle, ref_root_bh);
+
+	spin_lock(&oi->ip_lock);
+	oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
+	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
+	di->i_refcount_loc = cpu_to_le64(refcount_loc);
+	spin_unlock(&oi->ip_lock);
+	ocfs2_journal_dirty(handle, di_bh);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+out:
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	brelse(ref_root_bh);
+
+	return ret;
+}
+
+int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
+{
+	int ret, delete_tree = 0;
+	handle_t *handle = NULL;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_refcount_block *rb;
+	struct inode *alloc_inode = NULL;
+	struct buffer_head *alloc_bh = NULL;
+	struct buffer_head *blk_bh = NULL;
+	struct ocfs2_refcount_tree *ref_tree;
+	int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS;
+	u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
+	u16 bit = 0;
+
+	if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
+		return 0;
+
+	BUG_ON(!ref_blkno);
+	ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	rb = (struct ocfs2_refcount_block *)blk_bh->b_data;
+
+	/*
+	 * If we are the last user, we need to free the block.
+	 * So lock the allocator ahead.
+	 */
+	if (le32_to_cpu(rb->rf_count) == 1) {
+		blk = le64_to_cpu(rb->rf_blkno);
+		bit = le16_to_cpu(rb->rf_suballoc_bit);
+		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
+
+		alloc_inode = ocfs2_get_system_file_inode(osb,
+					EXTENT_ALLOC_SYSTEM_INODE,
+					le16_to_cpu(rb->rf_suballoc_slot));
+		if (!alloc_inode) {
+			ret = -ENOMEM;
+			mlog_errno(ret);
+			goto out;
+		}
+		mutex_lock(&alloc_inode->i_mutex);
+
+		ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_mutex;
+		}
+
+		credits += OCFS2_SUBALLOC_FREE;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	spin_lock(&oi->ip_lock);
+	oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL;
+	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
+	di->i_refcount_loc = 0;
+	spin_unlock(&oi->ip_lock);
+	ocfs2_journal_dirty(handle, di_bh);
+
+	le32_add_cpu(&rb->rf_count , -1);
+	ocfs2_journal_dirty(handle, blk_bh);
+
+	if (!rb->rf_count) {
+		delete_tree = 1;
+		ocfs2_erase_refcount_tree_from_list(osb, ref_tree);
+		ret = ocfs2_free_suballoc_bits(handle, alloc_inode,
+					       alloc_bh, bit, bg_blkno, 1);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+out_unlock:
+	if (alloc_inode) {
+		ocfs2_inode_unlock(alloc_inode, 1);
+		brelse(alloc_bh);
+	}
+out_mutex:
+	if (alloc_inode) {
+		mutex_unlock(&alloc_inode->i_mutex);
+		iput(alloc_inode);
+	}
+out:
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	if (delete_tree)
+		ocfs2_refcount_tree_put(ref_tree);
+	brelse(blk_bh);
+
+	return ret;
+}
-- 
cgit v1.2.3


From 853a3a1439b18d5a70ada2cb3fcd468e70b7d095 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:22:18 +0800
Subject: ocfs2: Wrap ocfs2_extent_contig in ocfs2_extent_tree.

Add a new operation eo_ocfs2_extent_contig int the extent tree's
operations vector. So that with the new refcount tree, We want
this so that refcount trees can always return CONTIG_NONE and
prevent extent merging.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c | 57 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 16 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ab4d2b59b472..75e65df34b69 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -52,7 +52,17 @@
 
 #include "buffer_head_io.h"
 
+enum ocfs2_contig_type {
+	CONTIG_NONE = 0,
+	CONTIG_LEFT,
+	CONTIG_RIGHT,
+	CONTIG_LEFTRIGHT,
+};
 
+static enum ocfs2_contig_type
+	ocfs2_extent_rec_contig(struct super_block *sb,
+				struct ocfs2_extent_rec *ext,
+				struct ocfs2_extent_rec *insert_rec);
 /*
  * Operations for a specific extent tree type.
  *
@@ -122,6 +132,16 @@ struct ocfs2_extent_tree_operations {
 	 * to 0 (unlimited).  Optional.
 	 */
 	void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et);
+
+	/*
+	 * ->eo_extent_contig test whether the 2 ocfs2_extent_rec
+	 * are contiguous or not. Optional. Don't need to set it if use
+	 * ocfs2_extent_rec as the tree leaf.
+	 */
+	enum ocfs2_contig_type
+		(*eo_extent_contig)(struct ocfs2_extent_tree *et,
+				    struct ocfs2_extent_rec *ext,
+				    struct ocfs2_extent_rec *insert_rec);
 };
 
 
@@ -458,6 +478,19 @@ static inline int ocfs2_et_root_journal_access(handle_t *handle,
 					  type);
 }
 
+static inline enum ocfs2_contig_type
+	ocfs2_et_extent_contig(struct ocfs2_extent_tree *et,
+			       struct ocfs2_extent_rec *rec,
+			       struct ocfs2_extent_rec *insert_rec)
+{
+	if (et->et_ops->eo_extent_contig)
+		return et->et_ops->eo_extent_contig(et, rec, insert_rec);
+
+	return ocfs2_extent_rec_contig(
+				ocfs2_metadata_cache_get_super(et->et_ci),
+				rec, insert_rec);
+}
+
 static inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et,
 					struct ocfs2_extent_rec *rec)
 {
@@ -736,17 +769,9 @@ int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
 	return ret;
 }
 
-enum ocfs2_contig_type {
-	CONTIG_NONE = 0,
-	CONTIG_LEFT,
-	CONTIG_RIGHT,
-	CONTIG_LEFTRIGHT,
-};
-
-
 /*
  * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and
- * ocfs2_extent_contig only work properly against leaf nodes!
+ * ocfs2_extent_rec_contig only work properly against leaf nodes!
  */
 static int ocfs2_block_extent_contig(struct super_block *sb,
 				     struct ocfs2_extent_rec *ext,
@@ -772,9 +797,9 @@ static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
 }
 
 static enum ocfs2_contig_type
-	ocfs2_extent_contig(struct super_block *sb,
-			    struct ocfs2_extent_rec *ext,
-			    struct ocfs2_extent_rec *insert_rec)
+	ocfs2_extent_rec_contig(struct super_block *sb,
+				struct ocfs2_extent_rec *ext,
+				struct ocfs2_extent_rec *insert_rec)
 {
 	u64 blkno = le64_to_cpu(insert_rec->e_blkno);
 
@@ -4400,7 +4425,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
 			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
 				ret = CONTIG_RIGHT;
 		} else {
-			ret = ocfs2_extent_contig(sb, rec, split_rec);
+			ret = ocfs2_et_extent_contig(et, rec, split_rec);
 		}
 	}
 
@@ -4445,7 +4470,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
 	if (rec) {
 		enum ocfs2_contig_type contig_type;
 
-		contig_type = ocfs2_extent_contig(sb, rec, split_rec);
+		contig_type = ocfs2_et_extent_contig(et, rec, split_rec);
 
 		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
 			ret = CONTIG_LEFTRIGHT;
@@ -4473,8 +4498,8 @@ static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
 	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
 
 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
-		contig_type = ocfs2_extent_contig(ocfs2_metadata_cache_get_super(et->et_ci),
-						  &el->l_recs[i], insert_rec);
+		contig_type = ocfs2_et_extent_contig(et, &el->l_recs[i],
+						     insert_rec);
 		if (contig_type != CONTIG_NONE) {
 			insert->ins_contig_index = i;
 			break;
-- 
cgit v1.2.3


From 555936bfcb1af26c6919d6cedb83710bb03d4322 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:22:21 +0800
Subject: ocfs2: Abstract extent split process.

ocfs2_mark_extent_written actually does the following things:
1. check the parameters.
2. initialize the left_path and split_rec.
3. call __ocfs2_mark_extent_written. it will do:
   1) check the flags of unwritten
   2) do the real split work.
The whole process is packed tightly somehow. So this patch
will abstract 2 different functions so that future b-tree
operation can work with it.

1. __ocfs2_split_extent will accept path and split_rec and do
  the real split work.
2. ocfs2_change_extent_flag will accept a new flag and initialize
   path and split_rec.

So now ocfs2_mark_extent_written will do:
1. check the parameters.
2. call ocfs2_change_extent_flag.
   1) initalize the left_path and split_rec.
   2) check whether the new flags conflict with the old one.
   3) call __ocfs2_split_extent to do the split.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c | 150 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 100 insertions(+), 50 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 75e65df34b69..14b9106849ca 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5032,9 +5032,8 @@ out:
 }
 
 /*
- * Mark part or all of the extent record at split_index in the leaf
- * pointed to by path as written. This removes the unwritten
- * extent flag.
+ * Split part or all of the extent record at split_index in the leaf
+ * pointed to by path. Merge with the contiguous extent record if needed.
  *
  * Care is taken to handle contiguousness so as to not grow the tree.
  *
@@ -5051,13 +5050,13 @@ out:
  * have been brought into cache (and pinned via the journal), so the
  * extra overhead is not expressed in terms of disk reads.
  */
-static int __ocfs2_mark_extent_written(handle_t *handle,
-				       struct ocfs2_extent_tree *et,
-				       struct ocfs2_path *path,
-				       int split_index,
-				       struct ocfs2_extent_rec *split_rec,
-				       struct ocfs2_alloc_context *meta_ac,
-				       struct ocfs2_cached_dealloc_ctxt *dealloc)
+static int __ocfs2_split_extent(handle_t *handle,
+				struct ocfs2_extent_tree *et,
+				struct ocfs2_path *path,
+				int split_index,
+				struct ocfs2_extent_rec *split_rec,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret = 0;
 	struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -5066,12 +5065,6 @@ static int __ocfs2_mark_extent_written(handle_t *handle,
 	struct ocfs2_merge_ctxt ctxt;
 	struct ocfs2_extent_list *rightmost_el;
 
-	if (!(rec->e_flags & OCFS2_EXT_UNWRITTEN)) {
-		ret = -EIO;
-		mlog_errno(ret);
-		goto out;
-	}
-
 	if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
 	    ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
 	     (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
@@ -5141,42 +5134,31 @@ out:
 }
 
 /*
- * Mark the already-existing extent at cpos as written for len clusters.
+ * Change the flags of the already-existing extent at cpos for len clusters.
+ *
+ * new_flags: the flags we want to set.
+ * clear_flags: the flags we want to clear.
+ * phys: the new physical offset we want this new extent starts from.
  *
  * If the existing extent is larger than the request, initiate a
  * split. An attempt will be made at merging with adjacent extents.
  *
  * The caller is responsible for passing down meta_ac if we'll need it.
  */
-int ocfs2_mark_extent_written(struct inode *inode,
-			      struct ocfs2_extent_tree *et,
-			      handle_t *handle, u32 cpos, u32 len, u32 phys,
-			      struct ocfs2_alloc_context *meta_ac,
-			      struct ocfs2_cached_dealloc_ctxt *dealloc)
+static int ocfs2_change_extent_flag(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
+				    u32 cpos, u32 len, u32 phys,
+				    struct ocfs2_alloc_context *meta_ac,
+				    struct ocfs2_cached_dealloc_ctxt *dealloc,
+				    int new_flags, int clear_flags)
 {
 	int ret, index;
-	u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys);
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
+	u64 start_blkno = ocfs2_clusters_to_blocks(sb, phys);
 	struct ocfs2_extent_rec split_rec;
 	struct ocfs2_path *left_path = NULL;
 	struct ocfs2_extent_list *el;
-
-	mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n",
-	     inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno);
-
-	if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
-		ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
-			    "that are being written to, but the feature bit "
-			    "is not set in the super block.",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
-		ret = -EROFS;
-		goto out;
-	}
-
-	/*
-	 * XXX: This should be fixed up so that we just re-insert the
-	 * next extent records.
-	 */
-	ocfs2_et_extent_map_truncate(et, 0);
+	struct ocfs2_extent_rec *rec;
 
 	left_path = ocfs2_new_path_from_et(et);
 	if (!left_path) {
@@ -5194,30 +5176,98 @@ int ocfs2_mark_extent_written(struct inode *inode,
 
 	index = ocfs2_search_extent_list(el, cpos);
 	if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
-		ocfs2_error(inode->i_sb,
-			    "Inode %llu has an extent at cpos %u which can no "
+		ocfs2_error(sb,
+			    "Owner %llu has an extent at cpos %u which can no "
 			    "longer be found.\n",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
+			     (unsigned long long)
+			     ocfs2_metadata_cache_owner(et->et_ci), cpos);
 		ret = -EROFS;
 		goto out;
 	}
 
+	ret = -EIO;
+	rec = &el->l_recs[index];
+	if (new_flags && (rec->e_flags & new_flags)) {
+		mlog(ML_ERROR, "Owner %llu tried to set %d flags on an "
+		     "extent that already had them",
+		     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+		     new_flags);
+		goto out;
+	}
+
+	if (clear_flags && !(rec->e_flags & clear_flags)) {
+		mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an "
+		     "extent that didn't have them",
+		     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+		     clear_flags);
+		goto out;
+	}
+
 	memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
 	split_rec.e_cpos = cpu_to_le32(cpos);
 	split_rec.e_leaf_clusters = cpu_to_le16(len);
 	split_rec.e_blkno = cpu_to_le64(start_blkno);
-	split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
-	split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
-
-	ret = __ocfs2_mark_extent_written(handle, et, left_path,
-					  index, &split_rec, meta_ac,
-					  dealloc);
+	split_rec.e_flags = rec->e_flags;
+	if (new_flags)
+		split_rec.e_flags |= new_flags;
+	if (clear_flags)
+		split_rec.e_flags &= ~clear_flags;
+
+	ret = __ocfs2_split_extent(handle, et, left_path,
+				  index, &split_rec, meta_ac,
+				  dealloc);
 	if (ret)
 		mlog_errno(ret);
 
 out:
 	ocfs2_free_path(left_path);
 	return ret;
+
+}
+
+/*
+ * Mark the already-existing extent at cpos as written for len clusters.
+ * This removes the unwritten extent flag.
+ *
+ * If the existing extent is larger than the request, initiate a
+ * split. An attempt will be made at merging with adjacent extents.
+ *
+ * The caller is responsible for passing down meta_ac if we'll need it.
+ */
+int ocfs2_mark_extent_written(struct inode *inode,
+			      struct ocfs2_extent_tree *et,
+			      handle_t *handle, u32 cpos, u32 len, u32 phys,
+			      struct ocfs2_alloc_context *meta_ac,
+			      struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+
+	mlog(0, "Inode %lu cpos %u, len %u, phys clusters %u\n",
+	     inode->i_ino, cpos, len, phys);
+
+	if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
+		ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
+			    "that are being written to, but the feature bit "
+			    "is not set in the super block.",
+			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
+		ret = -EROFS;
+		goto out;
+	}
+
+	/*
+	 * XXX: This should be fixed up so that we just re-insert the
+	 * next extent records.
+	 */
+	ocfs2_et_extent_map_truncate(et, 0);
+
+	ret = ocfs2_change_extent_flag(handle, et, cpos,
+				       len, phys, meta_ac, dealloc,
+				       0, OCFS2_EXT_UNWRITTEN);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	return ret;
 }
 
 static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
-- 
cgit v1.2.3


From fe924415957e60471536762172d127e85519ef78 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:22:25 +0800
Subject: ocfs2: Add refcount b-tree as a new extent tree.

Add refcount b-tree as a new extent tree so that it can
use the b-tree to store and maniuplate ocfs2_refcount_rec.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/alloc.h |  3 +++
 2 files changed, 57 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 14b9106849ca..a6296560a5cb 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -385,6 +385,52 @@ static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
 	.eo_fill_root_el	= ocfs2_dx_root_fill_root_el,
 };
 
+static void ocfs2_refcount_tree_fill_root_el(struct ocfs2_extent_tree *et)
+{
+	struct ocfs2_refcount_block *rb = et->et_object;
+
+	et->et_root_el = &rb->rf_list;
+}
+
+static void ocfs2_refcount_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
+						u64 blkno)
+{
+	struct ocfs2_refcount_block *rb = et->et_object;
+
+	rb->rf_last_eb_blk = cpu_to_le64(blkno);
+}
+
+static u64 ocfs2_refcount_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+	struct ocfs2_refcount_block *rb = et->et_object;
+
+	return le64_to_cpu(rb->rf_last_eb_blk);
+}
+
+static void ocfs2_refcount_tree_update_clusters(struct ocfs2_extent_tree *et,
+						u32 clusters)
+{
+	struct ocfs2_refcount_block *rb = et->et_object;
+
+	le32_add_cpu(&rb->rf_clusters, clusters);
+}
+
+static enum ocfs2_contig_type
+ocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et,
+				  struct ocfs2_extent_rec *ext,
+				  struct ocfs2_extent_rec *insert_rec)
+{
+	return CONTIG_NONE;
+}
+
+static struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = {
+	.eo_set_last_eb_blk	= ocfs2_refcount_tree_set_last_eb_blk,
+	.eo_get_last_eb_blk	= ocfs2_refcount_tree_get_last_eb_blk,
+	.eo_update_clusters	= ocfs2_refcount_tree_update_clusters,
+	.eo_fill_root_el	= ocfs2_refcount_tree_fill_root_el,
+	.eo_extent_contig	= ocfs2_refcount_tree_extent_contig,
+};
+
 static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
 				     struct ocfs2_caching_info *ci,
 				     struct buffer_head *bh,
@@ -439,6 +485,14 @@ void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
 				 NULL, &ocfs2_dx_root_et_ops);
 }
 
+void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *bh)
+{
+	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_rb,
+				 NULL, &ocfs2_refcount_tree_et_ops);
+}
+
 static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
 					    u64 new_last_eb_blk)
 {
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index bcf6aa42ae53..df0e778a2b68 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -80,6 +80,9 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
 void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
 				    struct ocfs2_caching_info *ci,
 				    struct buffer_head *bh);
+void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *bh);
 
 /*
  * Read an extent block into *bh.  If *bh is NULL, a bh will be
-- 
cgit v1.2.3


From e2e9f6082b5ff099978774d5c0148e062344c2f9 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:22:34 +0800
Subject: ocfs2: move tree path functions to alloc.h.

Now fs/ocfs2/alloc.c has more than 7000 lines. It contains our
basic b-tree operation. Although we have already make our b-tree
operation generic, the basic structrue ocfs2_path which is used
to iterate one b-tree branch is still static and limited to only
used in alloc.c. As refcount tree need them and I don't want to
add any more b-tree unrelated code to alloc.c, export them out.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c | 76 +++++++++++++++++---------------------------------------
 fs/ocfs2/alloc.h | 49 ++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 53 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a6296560a5cb..2c8ce32adf01 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -567,36 +567,6 @@ static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
 static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
 static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
 					 struct ocfs2_extent_block *eb);
-
-/*
- * Structures which describe a path through a btree, and functions to
- * manipulate them.
- *
- * The idea here is to be as generic as possible with the tree
- * manipulation code.
- */
-struct ocfs2_path_item {
-	struct buffer_head		*bh;
-	struct ocfs2_extent_list	*el;
-};
-
-#define OCFS2_MAX_PATH_DEPTH	5
-
-struct ocfs2_path {
-	int				p_tree_depth;
-	ocfs2_journal_access_func	p_root_access;
-	struct ocfs2_path_item		p_node[OCFS2_MAX_PATH_DEPTH];
-};
-
-#define path_root_bh(_path) ((_path)->p_node[0].bh)
-#define path_root_el(_path) ((_path)->p_node[0].el)
-#define path_root_access(_path)((_path)->p_root_access)
-#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
-#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
-#define path_num_items(_path) ((_path)->p_tree_depth + 1)
-
-static int ocfs2_find_path(struct ocfs2_caching_info *ci,
-			   struct ocfs2_path *path, u32 cpos);
 static void ocfs2_adjust_rightmost_records(handle_t *handle,
 					   struct ocfs2_extent_tree *et,
 					   struct ocfs2_path *path,
@@ -606,7 +576,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
  * to build another path. Generally, this involves freeing the buffer
  * heads.
  */
-static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
+void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
 {
 	int i, start = 0, depth = 0;
 	struct ocfs2_path_item *node;
@@ -635,7 +605,7 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
 	path->p_tree_depth = depth;
 }
 
-static void ocfs2_free_path(struct ocfs2_path *path)
+void ocfs2_free_path(struct ocfs2_path *path)
 {
 	if (path) {
 		ocfs2_reinit_path(path, 0);
@@ -733,13 +703,13 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
 	return path;
 }
 
-static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
+struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
 {
 	return ocfs2_new_path(path_root_bh(path), path_root_el(path),
 			      path_root_access(path));
 }
 
-static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
+struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
 {
 	return ocfs2_new_path(et->et_root_bh, et->et_root_el,
 			      et->et_root_journal_access);
@@ -752,10 +722,10 @@ static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
  * I don't like the way this function's name looks next to
  * ocfs2_journal_access_path(), but I don't have a better one.
  */
-static int ocfs2_path_bh_journal_access(handle_t *handle,
-					struct ocfs2_caching_info *ci,
-					struct ocfs2_path *path,
-					int idx)
+int ocfs2_path_bh_journal_access(handle_t *handle,
+				 struct ocfs2_caching_info *ci,
+				 struct ocfs2_path *path,
+				 int idx)
 {
 	ocfs2_journal_access_func access = path_root_access(path);
 
@@ -772,9 +742,9 @@ static int ocfs2_path_bh_journal_access(handle_t *handle,
 /*
  * Convenience function to journal all components in a path.
  */
-static int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
-				     handle_t *handle,
-				     struct ocfs2_path *path)
+int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
+			      handle_t *handle,
+			      struct ocfs2_path *path)
 {
 	int i, ret = 0;
 
@@ -1942,8 +1912,8 @@ static void find_path_ins(void *data, struct buffer_head *bh)
 	ocfs2_path_insert_eb(fp->path, fp->index, bh);
 	fp->index++;
 }
-static int ocfs2_find_path(struct ocfs2_caching_info *ci,
-			   struct ocfs2_path *path, u32 cpos)
+int ocfs2_find_path(struct ocfs2_caching_info *ci,
+		    struct ocfs2_path *path, u32 cpos)
 {
 	struct find_path_data data;
 
@@ -5104,13 +5074,13 @@ out:
  * have been brought into cache (and pinned via the journal), so the
  * extra overhead is not expressed in terms of disk reads.
  */
-static int __ocfs2_split_extent(handle_t *handle,
-				struct ocfs2_extent_tree *et,
-				struct ocfs2_path *path,
-				int split_index,
-				struct ocfs2_extent_rec *split_rec,
-				struct ocfs2_alloc_context *meta_ac,
-				struct ocfs2_cached_dealloc_ctxt *dealloc)
+int ocfs2_split_extent(handle_t *handle,
+		       struct ocfs2_extent_tree *et,
+		       struct ocfs2_path *path,
+		       int split_index,
+		       struct ocfs2_extent_rec *split_rec,
+		       struct ocfs2_alloc_context *meta_ac,
+		       struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret = 0;
 	struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -5267,9 +5237,9 @@ static int ocfs2_change_extent_flag(handle_t *handle,
 	if (clear_flags)
 		split_rec.e_flags &= ~clear_flags;
 
-	ret = __ocfs2_split_extent(handle, et, left_path,
-				  index, &split_rec, meta_ac,
-				  dealloc);
+	ret = ocfs2_split_extent(handle, et, left_path,
+				 index, &split_rec, meta_ac,
+				 dealloc);
 	if (ret)
 		mlog_errno(ret);
 
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index df0e778a2b68..3f4348923b73 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -115,6 +115,14 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 				struct ocfs2_alloc_context *meta_ac,
 				enum ocfs2_alloc_restarted *reason_ret);
 struct ocfs2_cached_dealloc_ctxt;
+struct ocfs2_path;
+int ocfs2_split_extent(handle_t *handle,
+		       struct ocfs2_extent_tree *et,
+		       struct ocfs2_path *path,
+		       int split_index,
+		       struct ocfs2_extent_rec *split_rec,
+		       struct ocfs2_alloc_context *meta_ac,
+		       struct ocfs2_cached_dealloc_ctxt *dealloc);
 int ocfs2_mark_extent_written(struct inode *inode,
 			      struct ocfs2_extent_tree *et,
 			      handle_t *handle, u32 cpos, u32 len, u32 phys,
@@ -254,4 +262,45 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
 	return !rec->e_leaf_clusters;
 }
 
+/*
+ * Structures which describe a path through a btree, and functions to
+ * manipulate them.
+ *
+ * The idea here is to be as generic as possible with the tree
+ * manipulation code.
+ */
+struct ocfs2_path_item {
+	struct buffer_head		*bh;
+	struct ocfs2_extent_list	*el;
+};
+
+#define OCFS2_MAX_PATH_DEPTH	5
+
+struct ocfs2_path {
+	int				p_tree_depth;
+	ocfs2_journal_access_func	p_root_access;
+	struct ocfs2_path_item		p_node[OCFS2_MAX_PATH_DEPTH];
+};
+
+#define path_root_bh(_path) ((_path)->p_node[0].bh)
+#define path_root_el(_path) ((_path)->p_node[0].el)
+#define path_root_access(_path)((_path)->p_root_access)
+#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
+#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
+#define path_num_items(_path) ((_path)->p_tree_depth + 1)
+
+void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root);
+void ocfs2_free_path(struct ocfs2_path *path);
+int ocfs2_find_path(struct ocfs2_caching_info *ci,
+		    struct ocfs2_path *path,
+		    u32 cpos);
+struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path);
+struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et);
+int ocfs2_path_bh_journal_access(handle_t *handle,
+				 struct ocfs2_caching_info *ci,
+				 struct ocfs2_path *path,
+				 int idx);
+int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
+			      handle_t *handle,
+			      struct ocfs2_path *path);
 #endif /* OCFS2_ALLOC_H */
-- 
cgit v1.2.3


From e73a819db9c2d6c4065b7cab7374709b6939e8f1 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 11 Aug 2009 14:33:14 +0800
Subject: ocfs2: Add support for incrementing refcount in the tree.

    Given a physical cpos and length, increment the refcount
in the tree. If the extent has not been seen before, a refcount
record is created for it. Refcount records may be merged or
split by this operation.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/extent_map.c   |   15 +-
 fs/ocfs2/extent_map.h   |    5 +
 fs/ocfs2/ocfs2_fs.h     |    7 +
 fs/ocfs2/refcounttree.c | 1053 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1073 insertions(+), 7 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index dc9482cb463a..40b51056bb32 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -353,11 +353,11 @@ static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
  * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
  * containing el.
  */
-static int ocfs2_figure_hole_clusters(struct inode *inode,
-				      struct ocfs2_extent_list *el,
-				      struct buffer_head *eb_bh,
-				      u32 v_cluster,
-				      u32 *num_clusters)
+int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
+			       struct ocfs2_extent_list *el,
+			       struct buffer_head *eb_bh,
+			       u32 v_cluster,
+			       u32 *num_clusters)
 {
 	int ret, i;
 	struct buffer_head *next_eb_bh = NULL;
@@ -375,7 +375,7 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
 		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 			goto no_more_extents;
 
-		ret = ocfs2_read_extent_block(INODE_CACHE(inode),
+		ret = ocfs2_read_extent_block(ci,
 					      le64_to_cpu(eb->h_next_leaf_blk),
 					      &next_eb_bh);
 		if (ret) {
@@ -456,7 +456,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
 		 * field.
 		 */
 		if (hole_len) {
-			ret = ocfs2_figure_hole_clusters(inode, el, eb_bh,
+			ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
+							 el, eb_bh,
 							 v_cluster, &len);
 			if (ret) {
 				mlog_errno(ret);
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index b7dd9731b462..9942f47efda7 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -61,6 +61,11 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 			   struct buffer_head *bhs[], int flags,
 			   int (*validate)(struct super_block *sb,
 					   struct buffer_head *bh));
+int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
+			       struct ocfs2_extent_list *el,
+			       struct buffer_head *eb_bh,
+			       u32 v_cluster,
+			       u32 *num_clusters);
 static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
 					struct buffer_head **bh,
 					int (*validate)(struct super_block *sb,
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index e4288b446ec0..40072cdef7b6 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -916,6 +916,7 @@ struct ocfs2_refcount_rec {
 	__le32 r_refcount;	/* Reference count of this extent */
 /*10*/
 };
+#define OCFS2_32BIT_POS_MASK		(0xffffffffULL)
 
 #define OCFS2_REFCOUNT_LEAF_FL          (0x00000001)
 #define OCFS2_REFCOUNT_TREE_FL          (0x00000002)
@@ -1394,6 +1395,12 @@ static inline u16 ocfs2_refcount_recs_per_rb(struct super_block *sb)
 
 	return size / sizeof(struct ocfs2_refcount_rec);
 }
+
+static inline u32
+ocfs2_get_ref_rec_low_cpos(const struct ocfs2_refcount_rec *rec)
+{
+	return le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
+}
 #else
 static inline int ocfs2_fast_symlink_chars(int blocksize)
 {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index d0d6fa312b01..ee0422ce72c4 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -15,6 +15,7 @@
  * General Public License for more details.
  */
 
+#include <linux/sort.h>
 #define MLOG_MASK_PREFIX ML_REFCOUNT
 #include <cluster/masklog.h>
 #include "ocfs2.h"
@@ -29,6 +30,7 @@
 #include "refcounttree.h"
 #include "sysfile.h"
 #include "dlmglue.h"
+#include "extent_map.h"
 
 static inline struct ocfs2_refcount_tree *
 cache_info_to_refcount(struct ocfs2_caching_info *ci)
@@ -848,3 +850,1054 @@ out:
 
 	return ret;
 }
+
+static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci,
+					  struct buffer_head *ref_leaf_bh,
+					  u64 cpos, unsigned int len,
+					  struct ocfs2_refcount_rec *ret_rec,
+					  int *index)
+{
+	int i = 0;
+	struct ocfs2_refcount_block *rb =
+		(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_rec *rec = NULL;
+
+	for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) {
+		rec = &rb->rf_records.rl_recs[i];
+
+		if (le64_to_cpu(rec->r_cpos) +
+		    le32_to_cpu(rec->r_clusters) <= cpos)
+			continue;
+		else if (le64_to_cpu(rec->r_cpos) > cpos)
+			break;
+
+		/* ok, cpos fail in this rec. Just return. */
+		if (ret_rec)
+			*ret_rec = *rec;
+		goto out;
+	}
+
+	if (ret_rec) {
+		/* We meet with a hole here, so fake the rec. */
+		ret_rec->r_cpos = cpu_to_le64(cpos);
+		ret_rec->r_refcount = 0;
+		if (i < le16_to_cpu(rb->rf_records.rl_used) &&
+		    le64_to_cpu(rec->r_cpos) < cpos + len)
+			ret_rec->r_clusters =
+				cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos);
+		else
+			ret_rec->r_clusters = cpu_to_le32(len);
+	}
+
+out:
+	*index = i;
+}
+
+/*
+ * Given a cpos and len, try to find the refcount record which contains cpos.
+ * 1. If cpos can be found in one refcount record, return the record.
+ * 2. If cpos can't be found, return a fake record which start from cpos
+ *    and end at a small value between cpos+len and start of the next record.
+ *    This fake record has r_refcount = 0.
+ */
+static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
+				  struct buffer_head *ref_root_bh,
+				  u64 cpos, unsigned int len,
+				  struct ocfs2_refcount_rec *ret_rec,
+				  int *index,
+				  struct buffer_head **ret_bh)
+{
+	int ret = 0, i, found;
+	u32 low_cpos;
+	struct ocfs2_extent_list *el;
+	struct ocfs2_extent_rec *tmp, *rec = NULL;
+	struct ocfs2_extent_block *eb;
+	struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) {
+		ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len,
+					      ret_rec, index);
+		*ret_bh = ref_root_bh;
+		get_bh(ref_root_bh);
+		return 0;
+	}
+
+	el = &rb->rf_list;
+	low_cpos = cpos & OCFS2_32BIT_POS_MASK;
+
+	if (el->l_tree_depth) {
+		ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
+		el = &eb->h_list;
+
+		if (el->l_tree_depth) {
+			ocfs2_error(sb,
+			"refcount tree %llu has non zero tree "
+			"depth in leaf btree tree block %llu\n",
+			(unsigned long long)ocfs2_metadata_cache_owner(ci),
+			(unsigned long long)eb_bh->b_blocknr);
+			ret = -EROFS;
+			goto out;
+		}
+	}
+
+	found = 0;
+	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
+		rec = &el->l_recs[i];
+
+		if (le32_to_cpu(rec->e_cpos) <= low_cpos) {
+			found = 1;
+			break;
+		}
+	}
+
+	/* adjust len when we have ocfs2_extent_rec after it. */
+	if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) {
+		tmp = &el->l_recs[i+1];
+
+		if (le32_to_cpu(tmp->e_cpos) < cpos + len)
+			len = le32_to_cpu(tmp->e_cpos) - cpos;
+	}
+
+	ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
+					&ref_leaf_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len,
+				      ret_rec, index);
+	*ret_bh = ref_leaf_bh;
+out:
+	brelse(eb_bh);
+	return ret;
+}
+
+enum ocfs2_ref_rec_contig {
+	REF_CONTIG_NONE = 0,
+	REF_CONTIG_LEFT,
+	REF_CONTIG_RIGHT,
+	REF_CONTIG_LEFTRIGHT,
+};
+
+static enum ocfs2_ref_rec_contig
+	ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb,
+				    int index)
+{
+	if ((rb->rf_records.rl_recs[index].r_refcount ==
+	    rb->rf_records.rl_recs[index + 1].r_refcount) &&
+	    (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) +
+	    le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) ==
+	    le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos)))
+		return REF_CONTIG_RIGHT;
+
+	return REF_CONTIG_NONE;
+}
+
+static enum ocfs2_ref_rec_contig
+	ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb,
+				  int index)
+{
+	enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE;
+
+	if (index < le16_to_cpu(rb->rf_records.rl_used) - 1)
+		ret = ocfs2_refcount_rec_adjacent(rb, index);
+
+	if (index > 0) {
+		enum ocfs2_ref_rec_contig tmp;
+
+		tmp = ocfs2_refcount_rec_adjacent(rb, index - 1);
+
+		if (tmp == REF_CONTIG_RIGHT) {
+			if (ret == REF_CONTIG_RIGHT)
+				ret = REF_CONTIG_LEFTRIGHT;
+			else
+				ret = REF_CONTIG_LEFT;
+		}
+	}
+
+	return ret;
+}
+
+static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb,
+					   int index)
+{
+	BUG_ON(rb->rf_records.rl_recs[index].r_refcount !=
+	       rb->rf_records.rl_recs[index+1].r_refcount);
+
+	le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters,
+		     le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters));
+
+	if (index < le16_to_cpu(rb->rf_records.rl_used) - 2)
+		memmove(&rb->rf_records.rl_recs[index + 1],
+			&rb->rf_records.rl_recs[index + 2],
+			sizeof(struct ocfs2_refcount_rec) *
+			(le16_to_cpu(rb->rf_records.rl_used) - index - 2));
+
+	memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1],
+	       0, sizeof(struct ocfs2_refcount_rec));
+	le16_add_cpu(&rb->rf_records.rl_used, -1);
+}
+
+/*
+ * Merge the refcount rec if we are contiguous with the adjacent recs.
+ */
+static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
+				     int index)
+{
+	enum ocfs2_ref_rec_contig contig =
+				ocfs2_refcount_rec_contig(rb, index);
+
+	if (contig == REF_CONTIG_NONE)
+		return;
+
+	if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) {
+		BUG_ON(index == 0);
+		index--;
+	}
+
+	ocfs2_rotate_refcount_rec_left(rb, index);
+
+	if (contig == REF_CONTIG_LEFTRIGHT)
+		ocfs2_rotate_refcount_rec_left(rb, index);
+}
+
+static int ocfs2_change_refcount_rec(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_leaf_bh,
+				     int index, int change)
+{
+	int ret;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "change index %d, old count %u, change %d\n", index,
+	     le32_to_cpu(rec->r_refcount), change);
+	le32_add_cpu(&rec->r_refcount, change);
+
+	ocfs2_refcount_rec_merge(rb, index);
+
+	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
+	if (ret)
+		mlog_errno(ret);
+out:
+	return ret;
+}
+
+static int ocfs2_expand_inline_ref_root(handle_t *handle,
+					struct ocfs2_caching_info *ci,
+					struct buffer_head *ref_root_bh,
+					struct buffer_head **ref_leaf_bh,
+					struct ocfs2_alloc_context *meta_ac)
+{
+	int ret;
+	u16 suballoc_bit_start;
+	u32 num_got;
+	u64 blkno;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct buffer_head *new_bh = NULL;
+	struct ocfs2_refcount_block *new_rb;
+	struct ocfs2_refcount_block *root_rb =
+			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
+				   &suballoc_bit_start, &num_got,
+				   &blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	new_bh = sb_getblk(sb, blkno);
+	if (new_bh == NULL) {
+		ret = -EIO;
+		mlog_errno(ret);
+		goto out;
+	}
+	ocfs2_set_new_buffer_uptodate(ci, new_bh);
+
+	ret = ocfs2_journal_access_rb(handle, ci, new_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * Initialize ocfs2_refcount_block.
+	 * It should contain the same information as the old root.
+	 * so just memcpy it and change the corresponding field.
+	 */
+	memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize);
+
+	new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
+	new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
+	new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+	new_rb->rf_blkno = cpu_to_le64(blkno);
+	new_rb->rf_cpos = cpu_to_le32(0);
+	new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
+	new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
+	ocfs2_journal_dirty(handle, new_bh);
+
+	/* Now change the root. */
+	memset(&root_rb->rf_list, 0, sb->s_blocksize -
+	       offsetof(struct ocfs2_refcount_block, rf_list));
+	root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb));
+	root_rb->rf_clusters = cpu_to_le32(1);
+	root_rb->rf_list.l_next_free_rec = cpu_to_le16(1);
+	root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
+	root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
+	root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL);
+
+	ocfs2_journal_dirty(handle, ref_root_bh);
+
+	mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno,
+	     le16_to_cpu(new_rb->rf_records.rl_used));
+
+	*ref_leaf_bh = new_bh;
+	new_bh = NULL;
+out:
+	brelse(new_bh);
+	return ret;
+}
+
+static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev,
+					   struct ocfs2_refcount_rec *next)
+{
+	if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <=
+		ocfs2_get_ref_rec_low_cpos(next))
+		return 1;
+
+	return 0;
+}
+
+static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b)
+{
+	const struct ocfs2_refcount_rec *l = a, *r = b;
+	u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l);
+	u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r);
+
+	if (l_cpos > r_cpos)
+		return 1;
+	if (l_cpos < r_cpos)
+		return -1;
+	return 0;
+}
+
+static int cmp_refcount_rec_by_cpos(const void *a, const void *b)
+{
+	const struct ocfs2_refcount_rec *l = a, *r = b;
+	u64 l_cpos = le64_to_cpu(l->r_cpos);
+	u64 r_cpos = le64_to_cpu(r->r_cpos);
+
+	if (l_cpos > r_cpos)
+		return 1;
+	if (l_cpos < r_cpos)
+		return -1;
+	return 0;
+}
+
+static void swap_refcount_rec(void *a, void *b, int size)
+{
+	struct ocfs2_refcount_rec *l = a, *r = b, tmp;
+
+	tmp = *(struct ocfs2_refcount_rec *)l;
+	*(struct ocfs2_refcount_rec *)l =
+			*(struct ocfs2_refcount_rec *)r;
+	*(struct ocfs2_refcount_rec *)r = tmp;
+}
+
+/*
+ * The refcount cpos are ordered by their 64bit cpos,
+ * But we will use the low 32 bit to be the e_cpos in the b-tree.
+ * So we need to make sure that this pos isn't intersected with others.
+ *
+ * Note: The refcount block is already sorted by their low 32 bit cpos,
+ *       So just try the middle pos first, and we will exit when we find
+ *       the good position.
+ */
+static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl,
+					 u32 *split_pos, int *split_index)
+{
+	int num_used = le16_to_cpu(rl->rl_used);
+	int delta, middle = num_used / 2;
+
+	for (delta = 0; delta < middle; delta++) {
+		/* Let's check delta earlier than middle */
+		if (ocfs2_refcount_rec_no_intersect(
+					&rl->rl_recs[middle - delta - 1],
+					&rl->rl_recs[middle - delta])) {
+			*split_index = middle - delta;
+			break;
+		}
+
+		/* For even counts, don't walk off the end */
+		if ((middle + delta + 1) == num_used)
+			continue;
+
+		/* Now try delta past middle */
+		if (ocfs2_refcount_rec_no_intersect(
+					&rl->rl_recs[middle + delta],
+					&rl->rl_recs[middle + delta + 1])) {
+			*split_index = middle + delta + 1;
+			break;
+		}
+	}
+
+	if (delta >= middle)
+		return -ENOSPC;
+
+	*split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]);
+	return 0;
+}
+
+static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
+					    struct buffer_head *new_bh,
+					    u32 *split_cpos)
+{
+	int split_index = 0, num_moved, ret;
+	u32 cpos = 0;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_list *rl = &rb->rf_records;
+	struct ocfs2_refcount_block *new_rb =
+			(struct ocfs2_refcount_block *)new_bh->b_data;
+	struct ocfs2_refcount_list *new_rl = &new_rb->rf_records;
+
+	mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n",
+	     (unsigned long long)ref_leaf_bh->b_blocknr,
+	     le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used));
+
+	/*
+	 * XXX: Improvement later.
+	 * If we know all the high 32 bit cpos is the same, no need to sort.
+	 *
+	 * In order to make the whole process safe, we do:
+	 * 1. sort the entries by their low 32 bit cpos first so that we can
+	 *    find the split cpos easily.
+	 * 2. call ocfs2_insert_extent to insert the new refcount block.
+	 * 3. move the refcount rec to the new block.
+	 * 4. sort the entries by their 64 bit cpos.
+	 * 5. dirty the new_rb and rb.
+	 */
+	sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
+	     sizeof(struct ocfs2_refcount_rec),
+	     cmp_refcount_rec_by_low_cpos, swap_refcount_rec);
+
+	ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	new_rb->rf_cpos = cpu_to_le32(cpos);
+
+	/* move refcount records starting from split_index to the new block. */
+	num_moved = le16_to_cpu(rl->rl_used) - split_index;
+	memcpy(new_rl->rl_recs, &rl->rl_recs[split_index],
+	       num_moved * sizeof(struct ocfs2_refcount_rec));
+
+	/*ok, remove the entries we just moved over to the other block. */
+	memset(&rl->rl_recs[split_index], 0,
+	       num_moved * sizeof(struct ocfs2_refcount_rec));
+
+	/* change old and new rl_used accordingly. */
+	le16_add_cpu(&rl->rl_used, -num_moved);
+	new_rl->rl_used = cpu_to_le32(num_moved);
+
+	sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
+	     sizeof(struct ocfs2_refcount_rec),
+	     cmp_refcount_rec_by_cpos, swap_refcount_rec);
+
+	sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used),
+	     sizeof(struct ocfs2_refcount_rec),
+	     cmp_refcount_rec_by_cpos, swap_refcount_rec);
+
+	*split_cpos = cpos;
+	return 0;
+}
+
+static int ocfs2_new_leaf_refcount_block(handle_t *handle,
+					 struct ocfs2_caching_info *ci,
+					 struct buffer_head *ref_root_bh,
+					 struct buffer_head *ref_leaf_bh,
+					 struct ocfs2_alloc_context *meta_ac)
+{
+	int ret;
+	u16 suballoc_bit_start;
+	u32 num_got, new_cpos;
+	u64 blkno;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct ocfs2_refcount_block *root_rb =
+			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	struct buffer_head *new_bh = NULL;
+	struct ocfs2_refcount_block *new_rb;
+	struct ocfs2_extent_tree ref_et;
+
+	BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL));
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
+				   &suballoc_bit_start, &num_got,
+				   &blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	new_bh = sb_getblk(sb, blkno);
+	if (new_bh == NULL) {
+		ret = -EIO;
+		mlog_errno(ret);
+		goto out;
+	}
+	ocfs2_set_new_buffer_uptodate(ci, new_bh);
+
+	ret = ocfs2_journal_access_rb(handle, ci, new_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* Initialize ocfs2_refcount_block. */
+	new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
+	memset(new_rb, 0, sb->s_blocksize);
+	strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
+	new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
+	new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+	new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
+	new_rb->rf_blkno = cpu_to_le64(blkno);
+	new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
+	new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
+	new_rb->rf_records.rl_count =
+				cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
+	new_rb->rf_generation = root_rb->rf_generation;
+
+	ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_journal_dirty(handle, ref_leaf_bh);
+	ocfs2_journal_dirty(handle, new_bh);
+
+	ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh);
+
+	mlog(0, "insert new leaf block %llu at %u\n",
+	     (unsigned long long)new_bh->b_blocknr, new_cpos);
+
+	/* Insert the new leaf block with the specific offset cpos. */
+	ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr,
+				  1, 0, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	brelse(new_bh);
+	return ret;
+}
+
+static int ocfs2_expand_refcount_tree(handle_t *handle,
+				      struct ocfs2_caching_info *ci,
+				      struct buffer_head *ref_root_bh,
+				      struct buffer_head *ref_leaf_bh,
+				      struct ocfs2_alloc_context *meta_ac)
+{
+	int ret;
+	struct buffer_head *expand_bh = NULL;
+
+	if (ref_root_bh == ref_leaf_bh) {
+		/*
+		 * the old root bh hasn't been expanded to a b-tree,
+		 * so expand it first.
+		 */
+		ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh,
+						   &expand_bh, meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	} else {
+		expand_bh = ref_leaf_bh;
+		get_bh(expand_bh);
+	}
+
+
+	/* Now add a new refcount block into the tree.*/
+	ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh,
+					    expand_bh, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+out:
+	brelse(expand_bh);
+	return ret;
+}
+
+/*
+ * Adjust the extent rec in b-tree representing ref_leaf_bh.
+ *
+ * Only called when we have inserted a new refcount rec at index 0
+ * which means ocfs2_extent_rec.e_cpos may need some change.
+ */
+static int ocfs2_adjust_refcount_rec(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     struct buffer_head *ref_leaf_bh,
+				     struct ocfs2_refcount_rec *rec)
+{
+	int ret = 0, i;
+	u32 new_cpos, old_cpos;
+	struct ocfs2_path *path = NULL;
+	struct ocfs2_extent_tree et;
+	struct ocfs2_refcount_block *rb =
+		(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	struct ocfs2_extent_list *el;
+
+	if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL))
+		goto out;
+
+	rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	old_cpos = le32_to_cpu(rb->rf_cpos);
+	new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
+	if (old_cpos <= new_cpos)
+		goto out;
+
+	ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
+
+	path = ocfs2_new_path_from_et(&et);
+	if (!path) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_find_path(ci, path, old_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * 2 more credits, one for the leaf refcount block, one for
+	 * the extent block contains the extent rec.
+	 */
+	ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path),
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* change the leaf extent block first. */
+	el = path_leaf_el(path);
+
+	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++)
+		if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos)
+			break;
+
+	BUG_ON(i == le16_to_cpu(el->l_next_free_rec));
+
+	el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
+
+	/* change the r_cpos in the leaf block. */
+	rb->rf_cpos = cpu_to_le32(new_cpos);
+
+	ocfs2_journal_dirty(handle, path_leaf_bh(path));
+	ocfs2_journal_dirty(handle, ref_leaf_bh);
+
+out:
+	ocfs2_free_path(path);
+	return ret;
+}
+
+static int ocfs2_insert_refcount_rec(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     struct buffer_head *ref_leaf_bh,
+				     struct ocfs2_refcount_rec *rec,
+				     int index,
+				     struct ocfs2_alloc_context *meta_ac)
+{
+	int ret;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_list *rf_list = &rb->rf_records;
+	struct buffer_head *new_bh = NULL;
+
+	BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
+
+	if (rf_list->rl_used == rf_list->rl_count) {
+		u64 cpos = le64_to_cpu(rec->r_cpos);
+		u32 len = le32_to_cpu(rec->r_clusters);
+
+		ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
+						 ref_leaf_bh, meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, len, NULL, &index,
+					     &new_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		ref_leaf_bh = new_bh;
+		rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+		rf_list = &rb->rf_records;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (index < le16_to_cpu(rf_list->rl_used))
+		memmove(&rf_list->rl_recs[index + 1],
+			&rf_list->rl_recs[index],
+			(le16_to_cpu(rf_list->rl_used) - index) *
+			 sizeof(struct ocfs2_refcount_rec));
+
+	mlog(0, "insert refcount record start %llu, len %u, count %u "
+	     "to leaf block %llu at index %d\n",
+	     (unsigned long long)le64_to_cpu(rec->r_cpos),
+	     le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount),
+	     (unsigned long long)ref_leaf_bh->b_blocknr, index);
+
+	rf_list->rl_recs[index] = *rec;
+
+	le16_add_cpu(&rf_list->rl_used, 1);
+
+	ocfs2_refcount_rec_merge(rb, index);
+
+	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (index == 0) {
+		ret = ocfs2_adjust_refcount_rec(handle, ci,
+						ref_root_bh,
+						ref_leaf_bh, rec);
+		if (ret)
+			mlog_errno(ret);
+	}
+out:
+	brelse(new_bh);
+	return ret;
+}
+
+/*
+ * Split the refcount_rec indexed by "index" in ref_leaf_bh.
+ * This is much simple than our b-tree code.
+ * split_rec is the new refcount rec we want to insert.
+ * If split_rec->r_refcount > 0, we are changing the refcount(in case we
+ * increase refcount or decrease a refcount to non-zero).
+ * If split_rec->r_refcount == 0, we are punching a hole in current refcount
+ * rec( in case we decrease a refcount to zero).
+ */
+static int ocfs2_split_refcount_rec(handle_t *handle,
+				    struct ocfs2_caching_info *ci,
+				    struct buffer_head *ref_root_bh,
+				    struct buffer_head *ref_leaf_bh,
+				    struct ocfs2_refcount_rec *split_rec,
+				    int index,
+				    struct ocfs2_alloc_context *meta_ac,
+				    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret, recs_need;
+	u32 len;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_list *rf_list = &rb->rf_records;
+	struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index];
+	struct ocfs2_refcount_rec *tail_rec = NULL;
+	struct buffer_head *new_bh = NULL;
+
+	BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
+
+	mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n",
+	     le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters),
+	     le64_to_cpu(split_rec->r_cpos),
+	     le32_to_cpu(split_rec->r_clusters));
+
+	/*
+	 * If we just need to split the header or tail clusters,
+	 * no more recs are needed, just split is OK.
+	 * Otherwise we at least need one new recs.
+	 */
+	if (!split_rec->r_refcount &&
+	    (split_rec->r_cpos == orig_rec->r_cpos ||
+	     le64_to_cpu(split_rec->r_cpos) +
+	     le32_to_cpu(split_rec->r_clusters) ==
+	     le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
+		recs_need = 0;
+	else
+		recs_need = 1;
+
+	/*
+	 * We need one more rec if we split in the middle and the new rec have
+	 * some refcount in it.
+	 */
+	if (split_rec->r_refcount &&
+	    (split_rec->r_cpos != orig_rec->r_cpos &&
+	     le64_to_cpu(split_rec->r_cpos) +
+	     le32_to_cpu(split_rec->r_clusters) !=
+	     le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
+		recs_need++;
+
+	/* If the leaf block don't have enough record, expand it. */
+	if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) {
+		struct ocfs2_refcount_rec tmp_rec;
+		u64 cpos = le64_to_cpu(orig_rec->r_cpos);
+		len = le32_to_cpu(orig_rec->r_clusters);
+		ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
+						 ref_leaf_bh, meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		/*
+		 * We have to re-get it since now cpos may be moved to
+		 * another leaf block.
+		 */
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, len, &tmp_rec, &index,
+					     &new_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		ref_leaf_bh = new_bh;
+		rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+		rf_list = &rb->rf_records;
+		orig_rec = &rf_list->rl_recs[index];
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * We have calculated out how many new records we need and store
+	 * in recs_need, so spare enough space first by moving the records
+	 * after "index" to the end.
+	 */
+	if (index != le16_to_cpu(rf_list->rl_used) - 1)
+		memmove(&rf_list->rl_recs[index + 1 + recs_need],
+			&rf_list->rl_recs[index + 1],
+			(le16_to_cpu(rf_list->rl_used) - index - 1) *
+			 sizeof(struct ocfs2_refcount_rec));
+
+	len = (le64_to_cpu(orig_rec->r_cpos) +
+	      le32_to_cpu(orig_rec->r_clusters)) -
+	      (le64_to_cpu(split_rec->r_cpos) +
+	      le32_to_cpu(split_rec->r_clusters));
+
+	/*
+	 * If we have "len", the we will split in the tail and move it
+	 * to the end of the space we have just spared.
+	 */
+	if (len) {
+		tail_rec = &rf_list->rl_recs[index + recs_need];
+
+		memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
+		le64_add_cpu(&tail_rec->r_cpos,
+			     le32_to_cpu(tail_rec->r_clusters) - len);
+		tail_rec->r_clusters = le32_to_cpu(len);
+	}
+
+	/*
+	 * If the split pos isn't the same as the original one, we need to
+	 * split in the head.
+	 *
+	 * Note: We have the chance that split_rec.r_refcount = 0,
+	 * recs_need = 0 and len > 0, which means we just cut the head from
+	 * the orig_rec and in that case we have done some modification in
+	 * orig_rec above, so the check for r_cpos is faked.
+	 */
+	if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) {
+		len = le64_to_cpu(split_rec->r_cpos) -
+		      le64_to_cpu(orig_rec->r_cpos);
+		orig_rec->r_clusters = cpu_to_le32(len);
+		index++;
+	}
+
+	le16_add_cpu(&rf_list->rl_used, recs_need);
+
+	if (split_rec->r_refcount) {
+		rf_list->rl_recs[index] = *split_rec;
+		mlog(0, "insert refcount record start %llu, len %u, count %u "
+		     "to leaf block %llu at index %d\n",
+		     (unsigned long long)le64_to_cpu(split_rec->r_cpos),
+		     le32_to_cpu(split_rec->r_clusters),
+		     le32_to_cpu(split_rec->r_refcount),
+		     (unsigned long long)ref_leaf_bh->b_blocknr, index);
+
+		ocfs2_refcount_rec_merge(rb, index);
+	}
+
+	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	brelse(new_bh);
+	return ret;
+}
+
+static int __ocfs2_increase_refcount(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     u64 cpos, u32 len,
+				     struct ocfs2_alloc_context *meta_ac,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0, index;
+	struct buffer_head *ref_leaf_bh = NULL;
+	struct ocfs2_refcount_rec rec;
+	unsigned int set_len = 0;
+
+	mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
+	     (unsigned long long)cpos, len);
+
+	while (len) {
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, len, &rec, &index,
+					     &ref_leaf_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		set_len = le32_to_cpu(rec.r_clusters);
+
+		/*
+		 * Here we may meet with 3 situations:
+		 *
+		 * 1. If we find an already existing record, and the length
+		 *    is the same, cool, we just need to increase the r_refcount
+		 *    and it is OK.
+		 * 2. If we find a hole, just insert it with r_refcount = 1.
+		 * 3. If we are in the middle of one extent record, split
+		 *    it.
+		 */
+		if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos &&
+		    set_len <= len) {
+			mlog(0, "increase refcount rec, start %llu, len %u, "
+			     "count %u\n", (unsigned long long)cpos, set_len,
+			     le32_to_cpu(rec.r_refcount));
+			ret = ocfs2_change_refcount_rec(handle, ci,
+							ref_leaf_bh, index, 1);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		} else if (!rec.r_refcount) {
+			rec.r_refcount = cpu_to_le32(1);
+
+			mlog(0, "insert refcount rec, start %llu, len %u\n",
+			     (unsigned long long)le64_to_cpu(rec.r_cpos),
+			     set_len);
+			ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
+							ref_leaf_bh,
+							&rec, index, meta_ac);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		} else  {
+			set_len = min((u64)(cpos + len),
+				      le64_to_cpu(rec.r_cpos) + set_len) - cpos;
+			rec.r_cpos = cpu_to_le64(cpos);
+			rec.r_clusters = cpu_to_le32(set_len);
+			le32_add_cpu(&rec.r_refcount, 1);
+
+			mlog(0, "split refcount rec, start %llu, "
+			     "len %u, count %u\n",
+			     (unsigned long long)le64_to_cpu(rec.r_cpos),
+			     set_len, le32_to_cpu(rec.r_refcount));
+			ret = ocfs2_split_refcount_rec(handle, ci,
+						       ref_root_bh, ref_leaf_bh,
+						       &rec, index,
+						       meta_ac, dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+
+		cpos += set_len;
+		len -= set_len;
+		brelse(ref_leaf_bh);
+		ref_leaf_bh = NULL;
+	}
+
+out:
+	brelse(ref_leaf_bh);
+	return ret;
+}
-- 
cgit v1.2.3


From 1823cb0b9fe5e6d48017ee3f92428f69c0235d87 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:24:49 +0800
Subject: ocfs2: Add support of decrementing refcount for delete.

    Given a physical cpos and length, decrement the refcount
in the tree. If the refcount for any portion of the extent goes
to zero, that portion is queued for freeing.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c        |   6 +-
 fs/ocfs2/alloc.h        |   3 +
 fs/ocfs2/refcounttree.c | 256 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ocfs2/refcounttree.h |   5 +
 4 files changed, 265 insertions(+), 5 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 2c8ce32adf01..9dd68cd7b0ad 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6522,9 +6522,9 @@ ocfs2_find_per_slot_free_list(int type,
 	return fl;
 }
 
-static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
-				     int type, int slot, u64 blkno,
-				     unsigned int bit)
+int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+			      int type, int slot, u64 blkno,
+			      unsigned int bit)
 {
 	int ret;
 	struct ocfs2_per_slot_free_list *fl;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 3f4348923b73..0610ba148ea0 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -202,6 +202,9 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
 }
 int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
 				u64 blkno, unsigned int bit);
+int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+			      int type, int slot, u64 blkno,
+			      unsigned int bit);
 static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
 {
 	return c->c_global_allocator != NULL;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ee0422ce72c4..2c7974cccaf8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1071,6 +1071,10 @@ static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
 		ocfs2_rotate_refcount_rec_left(rb, index);
 }
 
+/*
+ * Change the refcount indexed by "index" in ref_bh.
+ * If refcount reaches 0, remove it.
+ */
 static int ocfs2_change_refcount_rec(handle_t *handle,
 				     struct ocfs2_caching_info *ci,
 				     struct buffer_head *ref_leaf_bh,
@@ -1079,7 +1083,8 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
 	int ret;
 	struct ocfs2_refcount_block *rb =
 			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
-	struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
+	struct ocfs2_refcount_list *rl = &rb->rf_records;
+	struct ocfs2_refcount_rec *rec = &rl->rl_recs[index];
 
 	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
 				      OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1092,7 +1097,18 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
 	     le32_to_cpu(rec->r_refcount), change);
 	le32_add_cpu(&rec->r_refcount, change);
 
-	ocfs2_refcount_rec_merge(rb, index);
+	if (!rec->r_refcount) {
+		if (index != le16_to_cpu(rl->rl_used) - 1) {
+			memmove(rec, rec + 1,
+				(le16_to_cpu(rl->rl_used) - index - 1) *
+				sizeof(struct ocfs2_refcount_rec));
+			memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1],
+			       0, sizeof(struct ocfs2_refcount_rec));
+		}
+
+		le16_add_cpu(&rl->rl_used, -1);
+	} else
+		ocfs2_refcount_rec_merge(rb, index);
 
 	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
 	if (ret)
@@ -1901,3 +1917,239 @@ out:
 	brelse(ref_leaf_bh);
 	return ret;
 }
+
+static int ocfs2_remove_refcount_extent(handle_t *handle,
+				struct ocfs2_caching_info *ci,
+				struct buffer_head *ref_root_bh,
+				struct buffer_head *ref_leaf_bh,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_extent_tree et;
+
+	BUG_ON(rb->rf_records.rl_used);
+
+	ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
+	ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos),
+				  1, meta_ac, dealloc);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_remove_from_cache(ci, ref_leaf_bh);
+
+	/*
+	 * add the freed block to the dealloc so that it will be freed
+	 * when we run dealloc.
+	 */
+	ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
+					le16_to_cpu(rb->rf_suballoc_slot),
+					le64_to_cpu(rb->rf_blkno),
+					le16_to_cpu(rb->rf_suballoc_bit));
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	le32_add_cpu(&rb->rf_clusters, -1);
+
+	/*
+	 * check whether we need to restore the root refcount block if
+	 * there is no leaf extent block at atll.
+	 */
+	if (!rb->rf_list.l_next_free_rec) {
+		BUG_ON(rb->rf_clusters);
+
+		mlog(0, "reset refcount tree root %llu to be a record block.\n",
+		     (unsigned long long)ref_root_bh->b_blocknr);
+
+		rb->rf_flags = 0;
+		rb->rf_parent = 0;
+		rb->rf_cpos = 0;
+		memset(&rb->rf_records, 0, sb->s_blocksize -
+		       offsetof(struct ocfs2_refcount_block, rf_records));
+		rb->rf_records.rl_count =
+				cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
+	}
+
+	ocfs2_journal_dirty(handle, ref_root_bh);
+
+out:
+	return ret;
+}
+
+static int ocfs2_decrease_refcount_rec(handle_t *handle,
+				struct ocfs2_caching_info *ci,
+				struct buffer_head *ref_root_bh,
+				struct buffer_head *ref_leaf_bh,
+				int index, u64 cpos, unsigned int len,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+	struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
+
+	BUG_ON(cpos < le64_to_cpu(rec->r_cpos));
+	BUG_ON(cpos + len >
+	       le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters));
+
+	if (cpos == le64_to_cpu(rec->r_cpos) &&
+	    len == le32_to_cpu(rec->r_clusters))
+		ret = ocfs2_change_refcount_rec(handle, ci,
+						ref_leaf_bh, index, -1);
+	else {
+		struct ocfs2_refcount_rec split = *rec;
+		split.r_cpos = cpu_to_le64(cpos);
+		split.r_clusters = cpu_to_le32(len);
+
+		le32_add_cpu(&split.r_refcount, -1);
+
+		mlog(0, "split refcount rec, start %llu, "
+		     "len %u, count %u, original start %llu, len %u\n",
+		     (unsigned long long)le64_to_cpu(split.r_cpos),
+		     len, le32_to_cpu(split.r_refcount),
+		     (unsigned long long)le64_to_cpu(rec->r_cpos),
+		     le32_to_cpu(rec->r_clusters));
+		ret = ocfs2_split_refcount_rec(handle, ci,
+					       ref_root_bh, ref_leaf_bh,
+					       &split, index,
+					       meta_ac, dealloc);
+	}
+
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* Remove the leaf refcount block if it contains no refcount record. */
+	if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) {
+		ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh,
+						   ref_leaf_bh, meta_ac,
+						   dealloc);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+out:
+	return ret;
+}
+
+static int __ocfs2_decrease_refcount(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     u64 cpos, u32 len,
+				     struct ocfs2_alloc_context *meta_ac,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0, index = 0;
+	struct ocfs2_refcount_rec rec;
+	unsigned int r_count = 0, r_len;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct buffer_head *ref_leaf_bh = NULL;
+
+	mlog(0, "Tree owner %llu, decrease refcount start %llu, len %u\n",
+	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
+	     (unsigned long long)cpos, len);
+
+	while (len) {
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, len, &rec, &index,
+					     &ref_leaf_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		r_count = le32_to_cpu(rec.r_refcount);
+		BUG_ON(r_count == 0);
+
+		r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) +
+			      le32_to_cpu(rec.r_clusters)) - cpos;
+
+		ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh,
+						  ref_leaf_bh, index,
+						  cpos, r_len,
+						  meta_ac, dealloc);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		if (le32_to_cpu(rec.r_refcount) == 1) {
+			ret = ocfs2_cache_cluster_dealloc(dealloc,
+					  ocfs2_clusters_to_blocks(sb, cpos),
+							  r_len);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+
+		cpos += r_len;
+		len -= r_len;
+		brelse(ref_leaf_bh);
+		ref_leaf_bh = NULL;
+	}
+
+out:
+	brelse(ref_leaf_bh);
+	return ret;
+}
+
+/* Caller must hold refcount tree lock. */
+int ocfs2_decrease_refcount(struct inode *inode,
+			    handle_t *handle, u32 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	u64 ref_blkno;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_tree *tree;
+
+	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	ret = ocfs2_get_refcount_block(inode, &ref_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
+					&ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh,
+					cpos, len, meta_ac, dealloc);
+	if (ret)
+		mlog_errno(ret);
+out:
+	brelse(ref_root_bh);
+	return ret;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 2ea7fc52c23c..ad4b483ec5c7 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -40,4 +40,9 @@ int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw,
 void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
 				struct ocfs2_refcount_tree *tree,
 				int rw);
+
+int ocfs2_decrease_refcount(struct inode *inode,
+			    handle_t *handle, u32 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v1.2.3


From 1aa75fea64bc26bda9be9b1b20ae253d7a481877 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:28:39 +0800
Subject: ocfs2: Add functions for extents refcounted.

Add function ocfs2_mark_extent_refcounted which can mark
an extent refcounted.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c        | 12 ++++++------
 fs/ocfs2/alloc.h        |  6 ++++++
 fs/ocfs2/ocfs2.h        |  7 +++++++
 fs/ocfs2/refcounttree.c | 39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 58 insertions(+), 6 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9dd68cd7b0ad..96f8ca6b3aba 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5169,12 +5169,12 @@ out:
  *
  * The caller is responsible for passing down meta_ac if we'll need it.
  */
-static int ocfs2_change_extent_flag(handle_t *handle,
-				    struct ocfs2_extent_tree *et,
-				    u32 cpos, u32 len, u32 phys,
-				    struct ocfs2_alloc_context *meta_ac,
-				    struct ocfs2_cached_dealloc_ctxt *dealloc,
-				    int new_flags, int clear_flags)
+int ocfs2_change_extent_flag(handle_t *handle,
+			     struct ocfs2_extent_tree *et,
+			     u32 cpos, u32 len, u32 phys,
+			     struct ocfs2_alloc_context *meta_ac,
+			     struct ocfs2_cached_dealloc_ctxt *dealloc,
+			     int new_flags, int clear_flags)
 {
 	int ret, index;
 	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 0610ba148ea0..19d5b88a93df 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -128,6 +128,12 @@ int ocfs2_mark_extent_written(struct inode *inode,
 			      handle_t *handle, u32 cpos, u32 len, u32 phys,
 			      struct ocfs2_alloc_context *meta_ac,
 			      struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_change_extent_flag(handle_t *handle,
+			     struct ocfs2_extent_tree *et,
+			     u32 cpos, u32 len, u32 phys,
+			     struct ocfs2_alloc_context *meta_ac,
+			     struct ocfs2_cached_dealloc_ctxt *dealloc,
+			     int new_flags, int clear_flags);
 int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
 			u32 cpos, u32 len,
 			struct ocfs2_alloc_context *meta_ac,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index bb5357376ef5..eae404602424 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -516,6 +516,13 @@ static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
 	ocfs2_set_links_count(di, links);
 }
 
+static inline int ocfs2_refcount_tree(struct ocfs2_super *osb)
+{
+	if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
+		return 1;
+	return 0;
+}
+
 /* set / clear functions because cluster events can make these happen
  * in parallel so we want the transitions to be atomic. this also
  * means that any future flags osb_flags must be protected by spinlock
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 2c7974cccaf8..f7d19f4db897 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2153,3 +2153,42 @@ out:
 	brelse(ref_root_bh);
 	return ret;
 }
+
+/*
+ * Mark the already-existing extent at cpos as refcounted for len clusters.
+ * This adds the refcount extent flag.
+ *
+ * If the existing extent is larger than the request, initiate a
+ * split. An attempt will be made at merging with adjacent extents.
+ *
+ * The caller is responsible for passing down meta_ac if we'll need it.
+ */
+static int ocfs2_mark_extent_refcounted(struct inode *inode,
+				struct ocfs2_extent_tree *et,
+				handle_t *handle, u32 cpos,
+				u32 len, u32 phys,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+
+	mlog(0, "Inode %lu refcount tree cpos %u, len %u, phys cluster %u\n",
+	     inode->i_ino, cpos, len, phys);
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
+		ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
+			    "tree, but the feature bit is not set in the "
+			    "super block.", inode->i_ino);
+		ret = -EROFS;
+		goto out;
+	}
+
+	ret = ocfs2_change_extent_flag(handle, et, cpos,
+				       len, phys, meta_ac, dealloc,
+				       OCFS2_EXT_REFCOUNTED, 0);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	return ret;
+}
-- 
cgit v1.2.3


From bcbbb24a6a5c5b3e7b8e5284e0bfa23f45c32377 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:29:12 +0800
Subject: ocfs2: Decrement refcount when truncating refcounted extents.

Add 'Decrement refcount for delete' in to the normal truncate
process. So for a refcounted extent record, call refcount rec
decrementation instead of cluster free.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c        |  76 +++++++++++++++--
 fs/ocfs2/journal.h      |   3 +
 fs/ocfs2/refcounttree.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/refcounttree.h |   6 ++
 4 files changed, 290 insertions(+), 7 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 96f8ca6b3aba..03438a677933 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -49,6 +49,7 @@
 #include "super.h"
 #include "uptodate.h"
 #include "xattr.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -6673,7 +6674,7 @@ out:
  */
 static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
 			   handle_t *handle, struct ocfs2_truncate_context *tc,
-			   u32 clusters_to_del, u64 *delete_start)
+			   u32 clusters_to_del, u64 *delete_start, u8 *flags)
 {
 	int ret, i, index = path->p_tree_depth;
 	u32 new_edge = 0;
@@ -6683,6 +6684,7 @@ static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
 	struct ocfs2_extent_rec *rec;
 
 	*delete_start = 0;
+	*flags = 0;
 
 	while (index >= 0) {
 		bh = path->p_node[index].bh;
@@ -6770,6 +6772,7 @@ find_tail_record:
 			*delete_start = le64_to_cpu(rec->e_blkno)
 				+ ocfs2_clusters_to_blocks(inode->i_sb,
 					le16_to_cpu(rec->e_leaf_clusters));
+			*flags = rec->e_flags;
 
 			/*
 			 * If it's now empty, remove this record.
@@ -6869,7 +6872,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 			     struct buffer_head *fe_bh,
 			     handle_t *handle,
 			     struct ocfs2_truncate_context *tc,
-			     struct ocfs2_path *path)
+			     struct ocfs2_path *path,
+			     struct ocfs2_alloc_context *meta_ac)
 {
 	int status;
 	struct ocfs2_dinode *fe;
@@ -6877,6 +6881,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 	struct ocfs2_extent_list *el;
 	struct buffer_head *last_eb_bh = NULL;
 	u64 delete_blk = 0;
+	u8 rec_flags;
 
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 
@@ -6932,7 +6937,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 	inode->i_blocks = ocfs2_inode_sector_count(inode);
 
 	status = ocfs2_trim_tree(inode, path, handle, tc,
-				 clusters_to_del, &delete_blk);
+				 clusters_to_del, &delete_blk, &rec_flags);
 	if (status) {
 		mlog_errno(status);
 		goto bail;
@@ -6964,8 +6969,16 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 	}
 
 	if (delete_blk) {
-		status = ocfs2_truncate_log_append(osb, handle, delete_blk,
-						   clusters_to_del);
+		if (rec_flags & OCFS2_EXT_REFCOUNTED)
+			status = ocfs2_decrease_refcount(inode, handle,
+					ocfs2_blocks_to_clusters(osb->sb,
+								 delete_blk),
+					clusters_to_del, meta_ac,
+					&tc->tc_dealloc);
+		else
+			status = ocfs2_truncate_log_append(osb, handle,
+							   delete_blk,
+							   clusters_to_del);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -7383,11 +7396,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 {
 	int status, i, credits, tl_sem = 0;
 	u32 clusters_to_del, new_highest_cpos, range;
+	u64 blkno = 0;
 	struct ocfs2_extent_list *el;
 	handle_t *handle = NULL;
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct ocfs2_path *path = NULL;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	struct ocfs2_refcount_tree *ref_tree = NULL;
 
 	mlog_entry_void();
 
@@ -7413,6 +7429,8 @@ start:
 		goto bail;
 	}
 
+	credits = 0;
+
 	/*
 	 * Truncate always works against the rightmost tree branch.
 	 */
@@ -7453,10 +7471,15 @@ start:
 		clusters_to_del = 0;
 	} else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) {
 		clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]);
+		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
 	} else if (range > new_highest_cpos) {
 		clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) +
 				   le32_to_cpu(el->l_recs[i].e_cpos)) -
 				  new_highest_cpos;
+		blkno = le64_to_cpu(el->l_recs[i].e_blkno) +
+			ocfs2_clusters_to_blocks(inode->i_sb,
+				ocfs2_rec_clusters(el, &el->l_recs[i]) -
+				clusters_to_del);
 	} else {
 		status = 0;
 		goto bail;
@@ -7465,6 +7488,29 @@ start:
 	mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n",
 	     clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
 
+	if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
+		BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
+			 OCFS2_HAS_REFCOUNT_FL));
+
+		status = ocfs2_lock_refcount_tree(osb,
+						le64_to_cpu(di->i_refcount_loc),
+						1, &ref_tree, NULL);
+		if (status) {
+			mlog_errno(status);
+			goto bail;
+		}
+
+		status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
+							       blkno,
+							       clusters_to_del,
+							       &credits,
+							       &meta_ac);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+	}
+
 	mutex_lock(&tl_inode->i_mutex);
 	tl_sem = 1;
 	/* ocfs2_truncate_log_needs_flush guarantees us at least one
@@ -7478,7 +7524,7 @@ start:
 		}
 	}
 
-	credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
+	credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
 						(struct ocfs2_dinode *)fe_bh->b_data,
 						el);
 	handle = ocfs2_start_trans(osb, credits);
@@ -7490,7 +7536,7 @@ start:
 	}
 
 	status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
-				   tc, path);
+				   tc, path, meta_ac);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -7504,6 +7550,16 @@ start:
 
 	ocfs2_reinit_path(path, 1);
 
+	if (meta_ac) {
+		ocfs2_free_alloc_context(meta_ac);
+		meta_ac = NULL;
+	}
+
+	if (ref_tree) {
+		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+		ref_tree = NULL;
+	}
+
 	/*
 	 * The check above will catch the case where we've truncated
 	 * away all allocation.
@@ -7520,6 +7576,12 @@ bail:
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+
+	if (ref_tree)
+		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+
 	ocfs2_run_deallocs(osb, &tc->tc_dealloc);
 
 	ocfs2_free_path(path);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index bd88c8b9f2fb..3f74e09b0d80 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -504,6 +504,9 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
  */
 #define OCFS2_REFCOUNT_TREE_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
 
+/* 2 metadata alloc, 2 new blocks and root refcount block */
+#define OCFS2_EXPAND_REFCOUNT_TREE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + 3)
+
 /*
  * Please note that the caller must make sure that root_el is the root
  * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index f7d19f4db897..e72dbdd3b6e8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2192,3 +2192,215 @@ static int ocfs2_mark_extent_refcounted(struct inode *inode,
 out:
 	return ret;
 }
+
+/*
+ * Given some contiguous physical clusters, calculate what we need
+ * for modifying their refcount.
+ */
+static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
+					    struct ocfs2_caching_info *ci,
+					    struct buffer_head *ref_root_bh,
+					    u64 start_cpos,
+					    u32 clusters,
+					    int *meta_add,
+					    int *credits)
+{
+	int ret = 0, index, ref_blocks = 0, recs_add = 0;
+	u64 cpos = start_cpos;
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_refcount_rec rec;
+	struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL;
+	u32 len;
+
+	mlog(0, "start_cpos %llu, clusters %u\n",
+	     (unsigned long long)start_cpos, clusters);
+	while (clusters) {
+		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
+					     cpos, clusters, &rec,
+					     &index, &ref_leaf_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		if (ref_leaf_bh != prev_bh) {
+			/*
+			 * Now we encounter a new leaf block, so calculate
+			 * whether we need to extend the old leaf.
+			 */
+			if (prev_bh) {
+				rb = (struct ocfs2_refcount_block *)
+							prev_bh->b_data;
+
+				if (le64_to_cpu(rb->rf_records.rl_used) +
+				    recs_add >
+				    le16_to_cpu(rb->rf_records.rl_count))
+					ref_blocks++;
+			}
+
+			recs_add = 0;
+			*credits += 1;
+			brelse(prev_bh);
+			prev_bh = ref_leaf_bh;
+			get_bh(prev_bh);
+		}
+
+		rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+
+		mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu,"
+		     "rec->r_clusters %u, rec->r_refcount %u, index %d\n",
+		     recs_add, (unsigned long long)cpos, clusters,
+		     (unsigned long long)le64_to_cpu(rec.r_cpos),
+		     le32_to_cpu(rec.r_clusters),
+		     le32_to_cpu(rec.r_refcount), index);
+
+		len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
+			  le32_to_cpu(rec.r_clusters)) - cpos;
+		/*
+		 * If the refcount rec already exist, cool. We just need
+		 * to check whether there is a split. Otherwise we just need
+		 * to increase the refcount.
+		 * If we will insert one, increases recs_add.
+		 *
+		 * We record all the records which will be inserted to the
+		 * same refcount block, so that we can tell exactly whether
+		 * we need a new refcount block or not.
+		 */
+		if (rec.r_refcount) {
+			/* Check whether we need a split at the beginning. */
+			if (cpos == start_cpos &&
+			    cpos != le64_to_cpu(rec.r_cpos))
+				recs_add++;
+
+			/* Check whether we need a split in the end. */
+			if (cpos + clusters < le64_to_cpu(rec.r_cpos) +
+			    le32_to_cpu(rec.r_clusters))
+				recs_add++;
+		} else
+			recs_add++;
+
+		brelse(ref_leaf_bh);
+		ref_leaf_bh = NULL;
+		clusters -= len;
+		cpos += len;
+	}
+
+	if (prev_bh) {
+		rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
+
+		if (le64_to_cpu(rb->rf_records.rl_used) + recs_add >
+		    le16_to_cpu(rb->rf_records.rl_count))
+			ref_blocks++;
+
+		*credits += 1;
+	}
+
+	if (!ref_blocks)
+		goto out;
+
+	mlog(0, "we need ref_blocks %d\n", ref_blocks);
+	*meta_add += ref_blocks;
+	*credits += ref_blocks;
+
+	/*
+	 * So we may need ref_blocks to insert into the tree.
+	 * That also means we need to change the b-tree and add that number
+	 * of records since we never merge them.
+	 * We need one more block for expansion since the new created leaf
+	 * block is also full and needs split.
+	 */
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) {
+		struct ocfs2_extent_tree et;
+
+		ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
+		*meta_add += ocfs2_extend_meta_needed(et.et_root_el);
+		*credits += ocfs2_calc_extend_credits(sb,
+						      et.et_root_el,
+						      ref_blocks);
+	} else {
+		*credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
+		*meta_add += 1;
+	}
+
+out:
+	brelse(ref_leaf_bh);
+	brelse(prev_bh);
+	return ret;
+}
+
+/*
+ * For refcount tree, we will decrease some contiguous clusters
+ * refcount count, so just go through it to see how many blocks
+ * we gonna touch and whether we need to create new blocks.
+ *
+ * Normally the refcount blocks store these refcount should be
+ * continguous also, so that we can get the number easily.
+ * As for meta_ac, we will at most add split 2 refcount record and
+ * 2 more refcount block, so just check it in a rough way.
+ *
+ * Caller must hold refcount tree lock.
+ */
+int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
+					  struct buffer_head *di_bh,
+					  u64 phys_blkno,
+					  u32 clusters,
+					  int *credits,
+					  struct ocfs2_alloc_context **meta_ac)
+{
+	int ret, ref_blocks = 0;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_tree *tree;
+	u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
+		ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
+			    "tree, but the feature bit is not set in the "
+			    "super block.", inode->i_ino);
+		ret = -EROFS;
+		goto out;
+	}
+
+	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
+				      le64_to_cpu(di->i_refcount_loc), &tree);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_read_refcount_block(&tree->rf_ci,
+					le64_to_cpu(di->i_refcount_loc),
+					&ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
+					       &tree->rf_ci,
+					       ref_root_bh,
+					       start_cpos, clusters,
+					       &ref_blocks, credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "reserve new metadata %d, credits = %d\n",
+	     ref_blocks, *credits);
+
+	if (ref_blocks) {
+		ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
+							ref_blocks, meta_ac);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+out:
+	brelse(ref_root_bh);
+	return ret;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index ad4b483ec5c7..b8c9ed7dc383 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -45,4 +45,10 @@ int ocfs2_decrease_refcount(struct inode *inode,
 			    handle_t *handle, u32 cpos, u32 len,
 			    struct ocfs2_alloc_context *meta_ac,
 			    struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
+					  struct buffer_head *di_bh,
+					  u64 phys_blkno,
+					  u32 clusters,
+					  int *credits,
+					  struct ocfs2_alloc_context **meta_ac);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v1.2.3


From 6f70fa519976a379d72781d927cf8e5f5b05ec86 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 25 Aug 2009 08:05:12 +0800
Subject: ocfs2: Add CoW support.

This patch try CoW support for a refcounted record.

the whole process will be:
1. Calculate how many clusters we need to CoW and where we start.
   Extents that are not completely encompassed by the write will
   be broken on 1MB boundaries.
2. Do CoW for the clusters with the help of page cache.
3. Change the b-tree structure with the new allocated clusters.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c        |  25 +-
 fs/ocfs2/alloc.h        |   5 +
 fs/ocfs2/aops.c         |   4 +-
 fs/ocfs2/aops.h         |   2 +
 fs/ocfs2/refcounttree.c | 814 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/refcounttree.h |   2 +
 6 files changed, 841 insertions(+), 11 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 03438a677933..b8fc95d10630 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6998,9 +6998,9 @@ static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
 	return 0;
 }
 
-static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
-				     unsigned int from, unsigned int to,
-				     struct page *page, int zero, u64 *phys)
+void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
+			      unsigned int from, unsigned int to,
+			      struct page *page, int zero, u64 *phys)
 {
 	int ret, partial = 0;
 
@@ -7068,20 +7068,16 @@ out:
 		ocfs2_unlock_and_free_pages(pages, numpages);
 }
 
-static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
-				struct page **pages, int *num)
+int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
+		     struct page **pages, int *num)
 {
 	int numpages, ret = 0;
-	struct super_block *sb = inode->i_sb;
 	struct address_space *mapping = inode->i_mapping;
 	unsigned long index;
 	loff_t last_page_bytes;
 
 	BUG_ON(start > end);
 
-	BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
-	       (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
-
 	numpages = 0;
 	last_page_bytes = PAGE_ALIGN(end);
 	index = start >> PAGE_CACHE_SHIFT;
@@ -7109,6 +7105,17 @@ out:
 	return ret;
 }
 
+static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
+				struct page **pages, int *num)
+{
+	struct super_block *sb = inode->i_sb;
+
+	BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
+	       (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
+
+	return ocfs2_grab_pages(inode, start, end, pages, num);
+}
+
 /*
  * Zero the area past i_size but still within an allocated
  * cluster. This avoids exposing nonzero data on subsequent file
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 19d5b88a93df..9c122d574464 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -271,6 +271,11 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
 	return !rec->e_leaf_clusters;
 }
 
+int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
+		     struct page **pages, int *num);
+void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
+			      unsigned int from, unsigned int to,
+			      struct page *page, int zero, u64 *phys);
 /*
  * Structures which describe a path through a btree, and functions to
  * manipulate them.
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 15c594dfd951..fdad075fed61 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -126,8 +126,8 @@ bail:
 	return err;
 }
 
-static int ocfs2_get_block(struct inode *inode, sector_t iblock,
-			   struct buffer_head *bh_result, int create)
+int ocfs2_get_block(struct inode *inode, sector_t iblock,
+		    struct buffer_head *bh_result, int create)
 {
 	int err = 0;
 	unsigned int ext_flags;
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 503e49232e11..c48e93ffc513 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -57,6 +57,8 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 			   struct buffer_head *di_bh);
 int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
 
+int ocfs2_get_block(struct inode *inode, sector_t iblock,
+		    struct buffer_head *bh_result, int create);
 /* all ocfs2_dio_end_io()'s fault */
 #define ocfs2_iocb_is_rw_locked(iocb) \
 	test_bit(0, (unsigned long *)&iocb->private)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index e72dbdd3b6e8..4e7df8b8fd4f 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -31,6 +31,27 @@
 #include "sysfile.h"
 #include "dlmglue.h"
 #include "extent_map.h"
+#include "aops.h"
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+struct ocfs2_cow_context {
+	struct inode *inode;
+	u32 cow_start;
+	u32 cow_len;
+	struct ocfs2_extent_tree di_et;
+	struct ocfs2_caching_info *ref_ci;
+	struct buffer_head *ref_root_bh;
+	struct ocfs2_alloc_context *meta_ac;
+	struct ocfs2_alloc_context *data_ac;
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+};
 
 static inline struct ocfs2_refcount_tree *
 cache_info_to_refcount(struct ocfs2_caching_info *ci)
@@ -2404,3 +2425,796 @@ out:
 	brelse(ref_root_bh);
 	return ret;
 }
+
+#define	MAX_CONTIG_BYTES	1048576
+
+static inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb)
+{
+	return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES);
+}
+
+static inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb)
+{
+	return ~(ocfs2_cow_contig_clusters(sb) - 1);
+}
+
+/*
+ * Given an extent that starts at 'start' and an I/O that starts at 'cpos',
+ * find an offset (start + (n * contig_clusters)) that is closest to cpos
+ * while still being less than or equal to it.
+ *
+ * The goal is to break the extent at a multiple of contig_clusters.
+ */
+static inline unsigned int ocfs2_cow_align_start(struct super_block *sb,
+						 unsigned int start,
+						 unsigned int cpos)
+{
+	BUG_ON(start > cpos);
+
+	return start + ((cpos - start) & ocfs2_cow_contig_mask(sb));
+}
+
+/*
+ * Given a cluster count of len, pad it out so that it is a multiple
+ * of contig_clusters.
+ */
+static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
+						  unsigned int len)
+{
+	unsigned int padded =
+		(len + (ocfs2_cow_contig_clusters(sb) - 1)) &
+		ocfs2_cow_contig_mask(sb);
+
+	/* Did we wrap? */
+	if (padded < len)
+		padded = UINT_MAX;
+
+	return padded;
+}
+
+/*
+ * Calculate out the start and number of virtual clusters we need to to CoW.
+ *
+ * cpos is vitual start cluster position we want to do CoW in a
+ * file and write_len is the cluster length.
+ *
+ * Normal we will start CoW from the beginning of extent record cotaining cpos.
+ * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
+ * get good I/O from the resulting extent tree.
+ */
+static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
+					   struct buffer_head *di_bh,
+					   u32 cpos,
+					   u32 write_len,
+					   u32 *cow_start,
+					   u32 *cow_len)
+{
+	int ret = 0;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+	struct ocfs2_extent_list *el = &di->id2.i_list;
+	int tree_height = le16_to_cpu(el->l_tree_depth), i;
+	struct buffer_head *eb_bh = NULL;
+	struct ocfs2_extent_block *eb = NULL;
+	struct ocfs2_extent_rec *rec;
+	unsigned int want_clusters, rec_end = 0;
+	int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb);
+	int leaf_clusters;
+
+	if (tree_height > 0) {
+		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
+		el = &eb->h_list;
+
+		if (el->l_tree_depth) {
+			ocfs2_error(inode->i_sb,
+				    "Inode %lu has non zero tree depth in "
+				    "leaf block %llu\n", inode->i_ino,
+				    (unsigned long long)eb_bh->b_blocknr);
+			ret = -EROFS;
+			goto out;
+		}
+	}
+
+	*cow_len = 0;
+	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
+		rec = &el->l_recs[i];
+
+		if (ocfs2_is_empty_extent(rec)) {
+			mlog_bug_on_msg(i != 0, "Inode %lu has empty record in "
+					"index %d\n", inode->i_ino, i);
+			continue;
+		}
+
+		if (le32_to_cpu(rec->e_cpos) +
+		    le16_to_cpu(rec->e_leaf_clusters) <= cpos)
+			continue;
+
+		if (*cow_len == 0) {
+			/*
+			 * We should find a refcounted record in the
+			 * first pass.
+			 */
+			BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED));
+			*cow_start = le32_to_cpu(rec->e_cpos);
+		}
+
+		/*
+		 * If we encounter a hole or a non-refcounted record,
+		 * stop the search.
+		 */
+		if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) ||
+		    (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)))
+			break;
+
+		leaf_clusters = le16_to_cpu(rec->e_leaf_clusters);
+		rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters;
+
+		/*
+		 * How many clusters do we actually need from
+		 * this extent?  First we see how many we actually
+		 * need to complete the write.  If that's smaller
+		 * than contig_clusters, we try for contig_clusters.
+		 */
+		if (!*cow_len)
+			want_clusters = write_len;
+		else
+			want_clusters = (cpos + write_len) -
+				(*cow_start + *cow_len);
+		if (want_clusters < contig_clusters)
+			want_clusters = contig_clusters;
+
+		/*
+		 * If the write does not cover the whole extent, we
+		 * need to calculate how we're going to split the extent.
+		 * We try to do it on contig_clusters boundaries.
+		 *
+		 * Any extent smaller than contig_clusters will be
+		 * CoWed in its entirety.
+		 */
+		if (leaf_clusters <= contig_clusters)
+			*cow_len += leaf_clusters;
+		else if (*cow_len || (*cow_start == cpos)) {
+			/*
+			 * This extent needs to be CoW'd from its
+			 * beginning, so all we have to do is compute
+			 * how many clusters to grab.  We align
+			 * want_clusters to the edge of contig_clusters
+			 * to get better I/O.
+			 */
+			want_clusters = ocfs2_cow_align_length(inode->i_sb,
+							       want_clusters);
+
+			if (leaf_clusters < want_clusters)
+				*cow_len += leaf_clusters;
+			else
+				*cow_len += want_clusters;
+		} else if ((*cow_start + contig_clusters) >=
+			   (cpos + write_len)) {
+			/*
+			 * Breaking off contig_clusters at the front
+			 * of the extent will cover our write.  That's
+			 * easy.
+			 */
+			*cow_len = contig_clusters;
+		} else if ((rec_end - cpos) <= contig_clusters) {
+			/*
+			 * Breaking off contig_clusters at the tail of
+			 * this extent will cover cpos.
+			 */
+			*cow_start = rec_end - contig_clusters;
+			*cow_len = contig_clusters;
+		} else if ((rec_end - cpos) <= want_clusters) {
+			/*
+			 * While we can't fit the entire write in this
+			 * extent, we know that the write goes from cpos
+			 * to the end of the extent.  Break that off.
+			 * We try to break it at some multiple of
+			 * contig_clusters from the front of the extent.
+			 * Failing that (ie, cpos is within
+			 * contig_clusters of the front), we'll CoW the
+			 * entire extent.
+			 */
+			*cow_start = ocfs2_cow_align_start(inode->i_sb,
+							   *cow_start, cpos);
+			*cow_len = rec_end - *cow_start;
+		} else {
+			/*
+			 * Ok, the entire write lives in the middle of
+			 * this extent.  Let's try to slice the extent up
+			 * nicely.  Optimally, our CoW region starts at
+			 * m*contig_clusters from the beginning of the
+			 * extent and goes for n*contig_clusters,
+			 * covering the entire write.
+			 */
+			*cow_start = ocfs2_cow_align_start(inode->i_sb,
+							   *cow_start, cpos);
+
+			want_clusters = (cpos + write_len) - *cow_start;
+			want_clusters = ocfs2_cow_align_length(inode->i_sb,
+							       want_clusters);
+			if (*cow_start + want_clusters <= rec_end)
+				*cow_len = want_clusters;
+			else
+				*cow_len = rec_end - *cow_start;
+		}
+
+		/* Have we covered our entire write yet? */
+		if ((*cow_start + *cow_len) >= (cpos + write_len))
+			break;
+
+		/*
+		 * If we reach the end of the extent block and don't get enough
+		 * clusters, continue with the next extent block if possible.
+		 */
+		if (i + 1 == le16_to_cpu(el->l_next_free_rec) &&
+		    eb && eb->h_next_leaf_blk) {
+			brelse(eb_bh);
+			eb_bh = NULL;
+
+			ret = ocfs2_read_extent_block(INODE_CACHE(inode),
+					       le64_to_cpu(eb->h_next_leaf_blk),
+					       &eb_bh);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			eb = (struct ocfs2_extent_block *) eb_bh->b_data;
+			el = &eb->h_list;
+			i = -1;
+		}
+	}
+
+out:
+	brelse(eb_bh);
+	return ret;
+}
+
+/*
+ * Prepare meta_ac, data_ac and calculate credits when we want to add some
+ * num_clusters in data_tree "et" and change the refcount for the old
+ * clusters(starting form p_cluster) in the refcount tree.
+ *
+ * Note:
+ * 1. since we may split the old tree, so we at most will need num_clusters + 2
+ *    more new leaf records.
+ * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so
+ *    just give data_ac = NULL.
+ */
+static int ocfs2_lock_refcount_allocators(struct super_block *sb,
+					u32 p_cluster, u32 num_clusters,
+					struct ocfs2_extent_tree *et,
+					struct ocfs2_caching_info *ref_ci,
+					struct buffer_head *ref_root_bh,
+					struct ocfs2_alloc_context **meta_ac,
+					struct ocfs2_alloc_context **data_ac,
+					int *credits)
+{
+	int ret = 0, meta_add = 0;
+	int num_free_extents = ocfs2_num_free_extents(OCFS2_SB(sb), et);
+
+	if (num_free_extents < 0) {
+		ret = num_free_extents;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (num_free_extents < num_clusters + 2)
+		meta_add =
+			ocfs2_extend_meta_needed(et->et_root_el);
+
+	*credits += ocfs2_calc_extend_credits(sb, et->et_root_el,
+					      num_clusters + 2);
+
+	ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh,
+					       p_cluster, num_clusters,
+					       &meta_add, credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n",
+	     meta_add, num_clusters, *credits);
+	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add,
+						meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (data_ac) {
+		ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters,
+					     data_ac);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+out:
+	if (ret) {
+		if (*meta_ac) {
+			ocfs2_free_alloc_context(*meta_ac);
+			*meta_ac = NULL;
+		}
+	}
+
+	return ret;
+}
+
+static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
+{
+	BUG_ON(buffer_dirty(bh));
+
+	clear_buffer_mapped(bh);
+
+	return 0;
+}
+
+static int ocfs2_duplicate_clusters(handle_t *handle,
+				    struct ocfs2_cow_context *context,
+				    u32 cpos, u32 old_cluster,
+				    u32 new_cluster, u32 new_len)
+{
+	int ret = 0, partial;
+	struct ocfs2_caching_info *ci = context->di_et.et_ci;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
+	struct page *page;
+	pgoff_t page_index;
+	unsigned int from, to;
+	loff_t offset, end, map_end;
+	struct address_space *mapping = context->inode->i_mapping;
+
+	mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
+	     new_cluster, new_len, cpos);
+
+	offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
+	end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
+
+	while (offset < end) {
+		page_index = offset >> PAGE_CACHE_SHIFT;
+		map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+		if (map_end > end)
+			map_end = end;
+
+		/* from, to is the offset within the page. */
+		from = offset & (PAGE_CACHE_SIZE - 1);
+		to = PAGE_CACHE_SIZE;
+		if (map_end & (PAGE_CACHE_SIZE - 1))
+			to = map_end & (PAGE_CACHE_SIZE - 1);
+
+		page = grab_cache_page(mapping, page_index);
+
+		/* This page can't be dirtied before we CoW it out. */
+		BUG_ON(PageDirty(page));
+
+		if (!PageUptodate(page)) {
+			ret = block_read_full_page(page, ocfs2_get_block);
+			if (ret) {
+				mlog_errno(ret);
+				goto unlock;
+			}
+			lock_page(page);
+		}
+
+		if (page_has_buffers(page)) {
+			ret = walk_page_buffers(handle, page_buffers(page),
+						from, to, &partial,
+						ocfs2_clear_cow_buffer);
+			if (ret) {
+				mlog_errno(ret);
+				goto unlock;
+			}
+		}
+
+		ocfs2_map_and_dirty_page(context->inode,
+					 handle, from, to,
+					 page, 0, &new_block);
+		mark_page_accessed(page);
+unlock:
+		unlock_page(page);
+		page_cache_release(page);
+		page = NULL;
+		offset = map_end;
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int ocfs2_clear_ext_refcount(handle_t *handle,
+				    struct ocfs2_extent_tree *et,
+				    u32 cpos, u32 p_cluster, u32 len,
+				    unsigned int ext_flags,
+				    struct ocfs2_alloc_context *meta_ac,
+				    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret, index;
+	struct ocfs2_extent_rec replace_rec;
+	struct ocfs2_path *path = NULL;
+	struct ocfs2_extent_list *el;
+	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
+	u64 ino = ocfs2_metadata_cache_owner(et->et_ci);
+
+	mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n",
+	     (unsigned long long)ino, cpos, len, p_cluster, ext_flags);
+
+	memset(&replace_rec, 0, sizeof(replace_rec));
+	replace_rec.e_cpos = cpu_to_le32(cpos);
+	replace_rec.e_leaf_clusters = cpu_to_le16(len);
+	replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb,
+								   p_cluster));
+	replace_rec.e_flags = ext_flags;
+	replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED;
+
+	path = ocfs2_new_path_from_et(et);
+	if (!path) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_find_path(et->et_ci, path, cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	el = path_leaf_el(path);
+
+	index = ocfs2_search_extent_list(el, cpos);
+	if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+		ocfs2_error(sb,
+			    "Inode %llu has an extent at cpos %u which can no "
+			    "longer be found.\n",
+			    (unsigned long long)ino, cpos);
+		ret = -EROFS;
+		goto out;
+	}
+
+	ret = ocfs2_split_extent(handle, et, path, index,
+				 &replace_rec, meta_ac, dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	ocfs2_free_path(path);
+	return ret;
+}
+
+static int ocfs2_replace_clusters(handle_t *handle,
+				  struct ocfs2_cow_context *context,
+				  u32 cpos, u32 old,
+				  u32 new, u32 len,
+				  unsigned int ext_flags)
+{
+	int ret;
+	struct ocfs2_caching_info *ci = context->di_et.et_ci;
+	u64 ino = ocfs2_metadata_cache_owner(ci);
+
+	mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n",
+	     (unsigned long long)ino, cpos, old, new, len, ext_flags);
+
+	/*If the old clusters is unwritten, no need to duplicate. */
+	if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+		ret = ocfs2_duplicate_clusters(handle, context, cpos,
+					       old, new, len);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	ret = ocfs2_clear_ext_refcount(handle, &context->di_et,
+				       cpos, new, len, ext_flags,
+				       context->meta_ac, &context->dealloc);
+	if (ret)
+		mlog_errno(ret);
+out:
+	return ret;
+}
+
+static int ocfs2_cow_sync_writeback(struct super_block *sb,
+				    struct ocfs2_cow_context *context,
+				    u32 cpos, u32 num_clusters)
+{
+	int ret = 0;
+	loff_t offset, end, map_end;
+	pgoff_t page_index;
+	struct page *page;
+
+	if (ocfs2_should_order_data(context->inode))
+		return 0;
+
+	offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
+	end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits);
+
+	ret = filemap_fdatawrite_range(context->inode->i_mapping,
+				       offset, end - 1);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	while (offset < end) {
+		page_index = offset >> PAGE_CACHE_SHIFT;
+		map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+		if (map_end > end)
+			map_end = end;
+
+		page = grab_cache_page(context->inode->i_mapping, page_index);
+		BUG_ON(!page);
+
+		wait_on_page_writeback(page);
+		if (PageError(page)) {
+			ret = -EIO;
+			mlog_errno(ret);
+		} else
+			mark_page_accessed(page);
+
+		unlock_page(page);
+		page_cache_release(page);
+		page = NULL;
+		offset = map_end;
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int ocfs2_make_clusters_writable(struct super_block *sb,
+					struct ocfs2_cow_context *context,
+					u32 cpos, u32 p_cluster,
+					u32 num_clusters, unsigned int e_flags)
+{
+	int ret, credits =  0;
+	u32 new_bit, new_len;
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+	handle_t *handle;
+
+	ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
+					     &context->di_et,
+					     context->ref_ci,
+					     context->ref_root_bh,
+					     &context->meta_ac,
+					     &context->data_ac, &credits);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	while (num_clusters) {
+		ret = __ocfs2_claim_clusters(osb, handle, context->data_ac,
+					     1, num_clusters,
+					     &new_bit, &new_len);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
+
+		ret = ocfs2_replace_clusters(handle, context,
+					     cpos, p_cluster, new_bit,
+					     new_len, e_flags);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
+
+		cpos += new_len;
+		p_cluster += new_len;
+		num_clusters -= new_len;
+	}
+
+	ret = __ocfs2_decrease_refcount(handle, context->ref_ci,
+					context->ref_root_bh,
+					p_cluster, num_clusters,
+					context->meta_ac,
+					&context->dealloc);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	/*
+	 * Here we should write the new page out first if we are
+	 * in write-back mode.
+	 */
+	ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+	if (ret)
+		mlog_errno(ret);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	if (context->data_ac) {
+		ocfs2_free_alloc_context(context->data_ac);
+		context->data_ac = NULL;
+	}
+	if (context->meta_ac) {
+		ocfs2_free_alloc_context(context->meta_ac);
+		context->meta_ac = NULL;
+	}
+
+	return ret;
+}
+
+static int ocfs2_replace_cow(struct inode *inode,
+			     struct buffer_head *di_bh,
+			     struct buffer_head *ref_root_bh,
+			     struct ocfs2_caching_info *ref_ci,
+			     u32 cow_start, u32 cow_len)
+{
+	int ret = 0;
+	u32 p_cluster, num_clusters, start = cow_start;
+	unsigned int ext_flags;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_cow_context *context;
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
+		ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
+			    "tree, but the feature bit is not set in the "
+			    "super block.", inode->i_ino);
+		return -EROFS;
+	}
+
+	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
+	if (!context) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		return ret;
+	}
+
+	context->inode = inode;
+	context->cow_start = cow_start;
+	context->cow_len = cow_len;
+	context->ref_ci = ref_ci;
+	context->ref_root_bh = ref_root_bh;
+
+	ocfs2_init_dealloc_ctxt(&context->dealloc);
+	ocfs2_init_dinode_extent_tree(&context->di_et,
+				      INODE_CACHE(inode), di_bh);
+
+	while (cow_len) {
+		ret = ocfs2_get_clusters(inode, cow_start, &p_cluster,
+					 &num_clusters, &ext_flags);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED));
+
+		if (cow_len < num_clusters)
+			num_clusters = cow_len;
+
+		ret = ocfs2_make_clusters_writable(inode->i_sb, context,
+						   cow_start, p_cluster,
+						   num_clusters, ext_flags);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		cow_len -= num_clusters;
+		cow_start += num_clusters;
+	}
+
+
+	/*
+	 * truncate the extent map here since no matter whether we meet with
+	 * any error during the action, we shouldn't trust cached extent map
+	 * any more.
+	 */
+	ocfs2_extent_map_trunc(inode, start);
+
+	if (ocfs2_dealloc_has_cluster(&context->dealloc)) {
+		ocfs2_schedule_truncate_log_flush(osb, 1);
+		ocfs2_run_deallocs(osb, &context->dealloc);
+	}
+
+	kfree(context);
+	return ret;
+}
+
+/*
+ * Starting at cpos, try to CoW write_len clusters.
+ * This will stop when it runs into a hole or an unrefcounted extent.
+ */
+static int ocfs2_refcount_cow_hunk(struct inode *inode,
+				   struct buffer_head *di_bh,
+				   u32 cpos, u32 write_len)
+{
+	int ret;
+	u32 cow_start = 0, cow_len = 0;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_refcount_tree *ref_tree;
+
+	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh, cpos, write_len,
+					      &cow_start, &cow_len);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+	mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, "
+	     "cow_len %u\n", inode->i_ino,
+	     cpos, write_len, cow_start, cow_len);
+
+	BUG_ON(cow_len == 0);
+
+	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
+				       1, &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_replace_cow(inode, di_bh, ref_root_bh, &ref_tree->rf_ci,
+				cow_start, cow_len);
+	if (ret)
+		mlog_errno(ret);
+
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	brelse(ref_root_bh);
+out:
+	return ret;
+}
+
+/*
+ * CoW any and all clusters between cpos and cpos+write_len.
+ * If this returns successfully, all clusters between cpos and
+ * cpos+write_len are safe to modify.
+ */
+int ocfs2_refcount_cow(struct inode *inode,
+		       struct buffer_head *di_bh,
+		       u32 cpos, u32 write_len)
+{
+	int ret = 0;
+	u32 p_cluster, num_clusters;
+	unsigned int ext_flags;
+
+	while (write_len) {
+		ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
+					 &num_clusters, &ext_flags);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		if (write_len < num_clusters)
+			num_clusters = write_len;
+
+		if (ext_flags & OCFS2_EXT_REFCOUNTED) {
+			ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
+						      num_clusters);
+			if (ret) {
+				mlog_errno(ret);
+				break;
+			}
+		}
+
+		write_len -= num_clusters;
+		cpos += num_clusters;
+	}
+
+	return ret;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index b8c9ed7dc383..9960878134df 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -51,4 +51,6 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 					  u32 clusters,
 					  int *credits,
 					  struct ocfs2_alloc_context **meta_ac);
+int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
+		       u32 cpos, u32 write_len);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v1.2.3


From 6ae23c5555176c5b23480c9c578ff27437085ba5 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:30:55 +0800
Subject: ocfs2: CoW refcount tree improvement.

During CoW, if the old extent record is refcounted, we allocate
som new clusters and do CoW. Actually we can have some improvement
here. If the old extent has refcount=1, that means now it is only
used by this file. So we don't need to allocate new clusters, just
remove the refcounted flag and it is OK. We also have to remove
it from the refcount tree while not deleting it.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c        |   2 +-
 fs/ocfs2/refcounttree.c | 104 ++++++++++++++++++++++++++++++++++++------------
 fs/ocfs2/refcounttree.h |   3 +-
 3 files changed, 81 insertions(+), 28 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b8fc95d10630..7c879fc7834f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6974,7 +6974,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 					ocfs2_blocks_to_clusters(osb->sb,
 								 delete_blk),
 					clusters_to_del, meta_ac,
-					&tc->tc_dealloc);
+					&tc->tc_dealloc, 1);
 		else
 			status = ocfs2_truncate_log_append(osb, handle,
 							   delete_blk,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4e7df8b8fd4f..0a92436557e3 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2076,7 +2076,8 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
 				     struct buffer_head *ref_root_bh,
 				     u64 cpos, u32 len,
 				     struct ocfs2_alloc_context *meta_ac,
-				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+				     struct ocfs2_cached_dealloc_ctxt *dealloc,
+				     int delete)
 {
 	int ret = 0, index = 0;
 	struct ocfs2_refcount_rec rec;
@@ -2084,9 +2085,10 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
 	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
 	struct buffer_head *ref_leaf_bh = NULL;
 
-	mlog(0, "Tree owner %llu, decrease refcount start %llu, len %u\n",
+	mlog(0, "Tree owner %llu, decrease refcount start %llu, "
+	     "len %u, delete %u\n",
 	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
-	     (unsigned long long)cpos, len);
+	     (unsigned long long)cpos, len, delete);
 
 	while (len) {
 		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
@@ -2099,6 +2101,8 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
 
 		r_count = le32_to_cpu(rec.r_refcount);
 		BUG_ON(r_count == 0);
+		if (!delete)
+			BUG_ON(r_count > 1);
 
 		r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) +
 			      le32_to_cpu(rec.r_clusters)) - cpos;
@@ -2112,7 +2116,7 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
 			goto out;
 		}
 
-		if (le32_to_cpu(rec.r_refcount) == 1) {
+		if (le32_to_cpu(rec.r_refcount) == 1 && delete) {
 			ret = ocfs2_cache_cluster_dealloc(dealloc,
 					  ocfs2_clusters_to_blocks(sb, cpos),
 							  r_len);
@@ -2137,7 +2141,8 @@ out:
 int ocfs2_decrease_refcount(struct inode *inode,
 			    handle_t *handle, u32 cpos, u32 len,
 			    struct ocfs2_alloc_context *meta_ac,
-			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+			    struct ocfs2_cached_dealloc_ctxt *dealloc,
+			    int delete)
 {
 	int ret;
 	u64 ref_blkno;
@@ -2167,7 +2172,7 @@ int ocfs2_decrease_refcount(struct inode *inode,
 	}
 
 	ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh,
-					cpos, len, meta_ac, dealloc);
+					cpos, len, meta_ac, dealloc, delete);
 	if (ret)
 		mlog_errno(ret);
 out:
@@ -2974,10 +2979,16 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 					u32 cpos, u32 p_cluster,
 					u32 num_clusters, unsigned int e_flags)
 {
-	int ret, credits =  0;
+	int ret, delete, index, credits =  0;
 	u32 new_bit, new_len;
+	unsigned int set_len;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 	handle_t *handle;
+	struct buffer_head *ref_leaf_bh = NULL;
+	struct ocfs2_refcount_rec rec;
+
+	mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n",
+	     cpos, p_cluster, num_clusters, e_flags);
 
 	ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
 					     &context->di_et,
@@ -2998,35 +3009,75 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 	}
 
 	while (num_clusters) {
-		ret = __ocfs2_claim_clusters(osb, handle, context->data_ac,
-					     1, num_clusters,
-					     &new_bit, &new_len);
+		ret = ocfs2_get_refcount_rec(context->ref_ci,
+					     context->ref_root_bh,
+					     p_cluster, num_clusters,
+					     &rec, &index, &ref_leaf_bh);
 		if (ret) {
 			mlog_errno(ret);
 			goto out_commit;
 		}
 
-		ret = ocfs2_replace_clusters(handle, context,
-					     cpos, p_cluster, new_bit,
-					     new_len, e_flags);
+		BUG_ON(!rec.r_refcount);
+		set_len = min((u64)p_cluster + num_clusters,
+			      le64_to_cpu(rec.r_cpos) +
+			      le32_to_cpu(rec.r_clusters)) - p_cluster;
+
+		/*
+		 * There are many different situation here.
+		 * 1. If refcount == 1, remove the flag and don't COW.
+		 * 2. If refcount > 1, allocate clusters.
+		 *    Here we may not allocate r_len once at a time, so continue
+		 *    until we reach num_clusters.
+		 */
+		if (le32_to_cpu(rec.r_refcount) == 1) {
+			delete = 0;
+			ret = ocfs2_clear_ext_refcount(handle, &context->di_et,
+						       cpos, p_cluster,
+						       set_len, e_flags,
+						       context->meta_ac,
+						       &context->dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				goto out_commit;
+			}
+		} else {
+			delete = 1;
+
+			ret = __ocfs2_claim_clusters(osb, handle,
+						     context->data_ac,
+						     1, set_len,
+						     &new_bit, &new_len);
+			if (ret) {
+				mlog_errno(ret);
+				goto out_commit;
+			}
+
+			ret = ocfs2_replace_clusters(handle, context,
+						     cpos, p_cluster, new_bit,
+						     new_len, e_flags);
+			if (ret) {
+				mlog_errno(ret);
+				goto out_commit;
+			}
+			set_len = new_len;
+		}
+
+		ret = __ocfs2_decrease_refcount(handle, context->ref_ci,
+						context->ref_root_bh,
+						p_cluster, set_len,
+						context->meta_ac,
+						&context->dealloc, delete);
 		if (ret) {
 			mlog_errno(ret);
 			goto out_commit;
 		}
 
-		cpos += new_len;
-		p_cluster += new_len;
-		num_clusters -= new_len;
-	}
-
-	ret = __ocfs2_decrease_refcount(handle, context->ref_ci,
-					context->ref_root_bh,
-					p_cluster, num_clusters,
-					context->meta_ac,
-					&context->dealloc);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_commit;
+		cpos += set_len;
+		p_cluster += set_len;
+		num_clusters -= set_len;
+		brelse(ref_leaf_bh);
+		ref_leaf_bh = NULL;
 	}
 
 	/*
@@ -3049,6 +3100,7 @@ out:
 		ocfs2_free_alloc_context(context->meta_ac);
 		context->meta_ac = NULL;
 	}
+	brelse(ref_leaf_bh);
 
 	return ret;
 }
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 9960878134df..a8c15b0b2307 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -44,7 +44,8 @@ void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
 int ocfs2_decrease_refcount(struct inode *inode,
 			    handle_t *handle, u32 cpos, u32 len,
 			    struct ocfs2_alloc_context *meta_ac,
-			    struct ocfs2_cached_dealloc_ctxt *dealloc);
+			    struct ocfs2_cached_dealloc_ctxt *dealloc,
+			    int delete);
 int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 					  struct buffer_head *di_bh,
 					  u64 phys_blkno,
-- 
cgit v1.2.3


From 293b2f70b4a16a1ca91efd28ef3d6634262c6887 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 25 Aug 2009 08:02:48 +0800
Subject: ocfs2: Integrate CoW in file write.

When we use mmap, we CoW the refcountd clusters in
ocfs2_write_begin_nolock. While for normal file
io(including directio), we do CoW in
ocfs2_prepare_inode_for_write.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/aops.c | 19 +++++++++++++
 fs/ocfs2/file.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ocfs2/file.h |  2 ++
 3 files changed, 104 insertions(+), 1 deletion(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index fdad075fed61..9db9d64ca475 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -44,6 +44,7 @@
 #include "suballoc.h"
 #include "super.h"
 #include "symlink.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -590,6 +591,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 		goto bail;
 	}
 
+	/* We should already CoW the refcounted extent. */
+	BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
 	/*
 	 * get_more_blocks() expects us to describe a hole by clearing
 	 * the mapped bit on bh_result().
@@ -1449,6 +1452,9 @@ static int ocfs2_populate_write_desc(struct inode *inode,
 				goto out;
 			}
 
+			/* We should already CoW the refcountd extent. */
+			BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
+
 			/*
 			 * Assume worst case - that we're writing in
 			 * the middle of the extent.
@@ -1700,6 +1706,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 		goto out;
 	}
 
+	ret = ocfs2_check_range_for_refcount(inode, pos, len);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	} else if (ret == 1) {
+		ret = ocfs2_refcount_cow(inode, di_bh,
+					 wc->w_cpos, wc->w_clen);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
 	ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
 					&extents_to_split);
 	if (ret) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4921b4ee9431..6ee20e82bcc5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -59,6 +59,7 @@
 #include "xattr.h"
 #include "acl.h"
 #include "quota.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -1656,6 +1657,70 @@ static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
 					 OCFS2_IOC_RESVSP64, &sr, change_size);
 }
 
+int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
+				   size_t count)
+{
+	int ret = 0;
+	unsigned int extent_flags;
+	u32 cpos, clusters, extent_len, phys_cpos;
+	struct super_block *sb = inode->i_sb;
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) ||
+	    !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
+		return 0;
+
+	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
+	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
+
+	while (clusters) {
+		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
+					 &extent_flags);
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
+			ret = 1;
+			break;
+		}
+
+		if (extent_len > clusters)
+			extent_len = clusters;
+
+		clusters -= extent_len;
+		cpos += extent_len;
+	}
+out:
+	return ret;
+}
+
+static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
+					    loff_t pos, size_t count,
+					    int *meta_level)
+{
+	int ret;
+	struct buffer_head *di_bh = NULL;
+	u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+	u32 clusters =
+		ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
+
+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	*meta_level = 1;
+
+	ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters);
+	if (ret)
+		mlog_errno(ret);
+out:
+	brelse(di_bh);
+	return ret;
+}
+
 static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 					 loff_t *ppos,
 					 size_t count,
@@ -1712,6 +1777,22 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 
 		end = saved_pos + count;
 
+		ret = ocfs2_check_range_for_refcount(inode, saved_pos, count);
+		if (ret == 1) {
+			ocfs2_inode_unlock(inode, meta_level);
+			meta_level = -1;
+
+			ret = ocfs2_prepare_inode_for_refcount(inode,
+							       saved_pos,
+							       count,
+							       &meta_level);
+		}
+
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto out_unlock;
+		}
+
 		/*
 		 * Skip the O_DIRECT checks if we don't need
 		 * them.
@@ -1758,7 +1839,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 		*ppos = saved_pos;
 
 out_unlock:
-	ocfs2_inode_unlock(inode, meta_level);
+	if (meta_level >= 0)
+		ocfs2_inode_unlock(inode, meta_level);
 
 out:
 	return ret;
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 172f9fbc9fc7..d66cf4f7c70e 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -69,4 +69,6 @@ int ocfs2_update_inode_atime(struct inode *inode,
 int ocfs2_change_file_space(struct file *file, unsigned int cmd,
 			    struct ocfs2_space_resv *sr);
 
+int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
+				   size_t count);
 #endif /* OCFS2_FILE_H */
-- 
cgit v1.2.3


From 37f8a2bfaa8364dd3644cccee8824bb8f5e409a5 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 26 Aug 2009 09:47:28 +0800
Subject: ocfs2: CoW a reflinked cluster when it is truncated.

When we truncate a file to a specific size which resides in a reflinked
cluster, we need to CoW it since ocfs2_zero_range_for_truncate will
zero the space after the size(just another type of write).

So we add a "max_cpos" in ocfs2_refcount_cow so that it will stop when
it hit the max cluster offset.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/aops.c         |  2 +-
 fs/ocfs2/file.c         | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 fs/ocfs2/refcounttree.c | 34 +++++++++++++++++++++++-----------
 fs/ocfs2/refcounttree.h |  2 +-
 4 files changed, 70 insertions(+), 14 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 9db9d64ca475..33e03c551127 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1712,7 +1712,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 		goto out;
 	} else if (ret == 1) {
 		ret = ocfs2_refcount_cow(inode, di_bh,
-					 wc->w_cpos, wc->w_clen);
+					 wc->w_cpos, wc->w_clen, UINT_MAX);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6ee20e82bcc5..75f5b81805b5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -335,6 +335,39 @@ out:
 	return ret;
 }
 
+static int ocfs2_cow_file_pos(struct inode *inode,
+			      struct buffer_head *fe_bh,
+			      u64 offset)
+{
+	int status;
+	u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+	unsigned int num_clusters = 0;
+	unsigned int ext_flags = 0;
+
+	/*
+	 * If the new offset is aligned to the range of the cluster, there is
+	 * no space for ocfs2_zero_range_for_truncate to fill, so no need to
+	 * CoW either.
+	 */
+	if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
+		return 0;
+
+	status = ocfs2_get_clusters(inode, cpos, &phys,
+				    &num_clusters, &ext_flags);
+	if (status) {
+		mlog_errno(status);
+		goto out;
+	}
+
+	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
+		goto out;
+
+	return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
+
+out:
+	return status;
+}
+
 static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 				     struct inode *inode,
 				     struct buffer_head *fe_bh,
@@ -347,6 +380,17 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 
 	mlog_entry_void();
 
+	/*
+	 * We need to CoW the cluster contains the offset if it is reflinked
+	 * since we will call ocfs2_zero_range_for_truncate later which will
+	 * write "0" from offset to the end of the cluster.
+	 */
+	status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
+	if (status) {
+		mlog_errno(status);
+		return status;
+	}
+
 	/* TODO: This needs to actually orphan the inode in this
 	 * transaction. */
 
@@ -1713,7 +1757,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
 
 	*meta_level = 1;
 
-	ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters);
+	ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
 	if (ret)
 		mlog_errno(ret);
 out:
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 0a92436557e3..37aa0c8696d6 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2482,6 +2482,7 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
  *
  * cpos is vitual start cluster position we want to do CoW in a
  * file and write_len is the cluster length.
+ * max_cpos is the place where we want to stop CoW intentionally.
  *
  * Normal we will start CoW from the beginning of extent record cotaining cpos.
  * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
@@ -2491,6 +2492,7 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 					   struct buffer_head *di_bh,
 					   u32 cpos,
 					   u32 write_len,
+					   u32 max_cpos,
 					   u32 *cow_start,
 					   u32 *cow_len)
 {
@@ -2505,6 +2507,8 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 	int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb);
 	int leaf_clusters;
 
+	BUG_ON(cpos + write_len > max_cpos);
+
 	if (tree_height > 0) {
 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh);
 		if (ret) {
@@ -2549,15 +2553,20 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 		}
 
 		/*
-		 * If we encounter a hole or a non-refcounted record,
-		 * stop the search.
+		 * If we encounter a hole, a non-refcounted record or
+		 * pass the max_cpos, stop the search.
 		 */
 		if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) ||
-		    (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)))
+		    (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)) ||
+		    (max_cpos <= le32_to_cpu(rec->e_cpos)))
 			break;
 
 		leaf_clusters = le16_to_cpu(rec->e_leaf_clusters);
 		rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters;
+		if (rec_end > max_cpos) {
+			rec_end = max_cpos;
+			leaf_clusters = rec_end - le32_to_cpu(rec->e_cpos);
+		}
 
 		/*
 		 * How many clusters do we actually need from
@@ -3184,12 +3193,13 @@ static int ocfs2_replace_cow(struct inode *inode,
 }
 
 /*
- * Starting at cpos, try to CoW write_len clusters.
- * This will stop when it runs into a hole or an unrefcounted extent.
+ * Starting at cpos, try to CoW write_len clusters.  Don't CoW
+ * past max_cpos.  This will stop when it runs into a hole or an
+ * unrefcounted extent.
  */
 static int ocfs2_refcount_cow_hunk(struct inode *inode,
 				   struct buffer_head *di_bh,
-				   u32 cpos, u32 write_len)
+				   u32 cpos, u32 write_len, u32 max_cpos)
 {
 	int ret;
 	u32 cow_start = 0, cow_len = 0;
@@ -3201,12 +3211,14 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
 
 	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
 
-	ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh, cpos, write_len,
+	ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh,
+					      cpos, write_len, max_cpos,
 					      &cow_start, &cow_len);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
 	}
+
 	mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, "
 	     "cow_len %u\n", inode->i_ino,
 	     cpos, write_len, cow_start, cow_len);
@@ -3233,12 +3245,12 @@ out:
 
 /*
  * CoW any and all clusters between cpos and cpos+write_len.
- * If this returns successfully, all clusters between cpos and
- * cpos+write_len are safe to modify.
+ * Don't CoW past max_cpos.  If this returns successfully, all
+ * clusters between cpos and cpos+write_len are safe to modify.
  */
 int ocfs2_refcount_cow(struct inode *inode,
 		       struct buffer_head *di_bh,
-		       u32 cpos, u32 write_len)
+		       u32 cpos, u32 write_len, u32 max_cpos)
 {
 	int ret = 0;
 	u32 p_cluster, num_clusters;
@@ -3257,7 +3269,7 @@ int ocfs2_refcount_cow(struct inode *inode,
 
 		if (ext_flags & OCFS2_EXT_REFCOUNTED) {
 			ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
-						      num_clusters);
+						      num_clusters, max_cpos);
 			if (ret) {
 				mlog_errno(ret);
 				break;
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index a8c15b0b2307..356f99c85635 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -53,5 +53,5 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 					  int *credits,
 					  struct ocfs2_alloc_context **meta_ac);
 int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
-		       u32 cpos, u32 write_len);
+		       u32 cpos, u32 write_len, u32 max_cpos);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v1.2.3


From 110a045aca62f6f564e3b68f89af2a3a5a6ecff2 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Sat, 22 Aug 2009 23:54:27 +0800
Subject: ocfs2: Add normal functions for reflink a normal file's extents.

2 major functions are added in this patch.

ocfs2_attach_refcount_tree will create a new refcount tree to the
old file if it doesn't have one and insert all the extent records
to the tree if they are not refcounted.

ocfs2_create_reflink_node will:
1. set the refcount tree to the new file.
2. call ocfs2_duplicate_extent_list which will iterate all the
   extents for the old file, insert it to the new file and increase
   the corresponding referennce count.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c | 286 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 286 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 37aa0c8696d6..e3171c483685 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3282,3 +3282,289 @@ int ocfs2_refcount_cow(struct inode *inode,
 
 	return ret;
 }
+
+/*
+ * Insert a new extent into refcount tree and mark a extent rec
+ * as refcounted in the dinode tree.
+ */
+int ocfs2_add_refcount_flag(struct inode *inode,
+			    struct ocfs2_extent_tree *data_et,
+			    struct ocfs2_caching_info *ref_ci,
+			    struct buffer_head *ref_root_bh,
+			    u32 cpos, u32 p_cluster, u32 num_clusters,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	handle_t *handle;
+	int credits = 1, ref_blocks = 0;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_alloc_context *meta_ac = NULL;
+
+	ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
+					       ref_ci, ref_root_bh,
+					       p_cluster, num_clusters,
+					       &ref_blocks, &credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "reserve new metadata %d, credits = %d\n",
+	     ref_blocks, credits);
+
+	if (ref_blocks) {
+		ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
+							ref_blocks, &meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_mark_extent_refcounted(inode, data_et, handle,
+					   cpos, num_clusters, p_cluster,
+					   meta_ac, dealloc);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+					p_cluster, num_clusters,
+					meta_ac, dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+out:
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+static int ocfs2_attach_refcount_tree(struct inode *inode,
+				      struct buffer_head *di_bh)
+{
+	int ret;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_refcount_tree *ref_tree;
+	unsigned int ext_flags;
+	loff_t size;
+	u32 cpos, num_clusters, clusters, p_cluster;
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+	struct ocfs2_extent_tree di_et;
+
+	ocfs2_init_dealloc_ctxt(&dealloc);
+
+	if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) {
+		ret = ocfs2_create_refcount_tree(inode, di_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	BUG_ON(!di->i_refcount_loc);
+	ret = ocfs2_lock_refcount_tree(osb,
+				       le64_to_cpu(di->i_refcount_loc), 1,
+				       &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh);
+
+	size = i_size_read(inode);
+	clusters = ocfs2_clusters_for_bytes(inode->i_sb, size);
+
+	cpos = 0;
+	while (cpos < clusters) {
+		ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
+					 &num_clusters, &ext_flags);
+
+		if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) {
+			ret = ocfs2_add_refcount_flag(inode, &di_et,
+						      &ref_tree->rf_ci,
+						      ref_root_bh, cpos,
+						      p_cluster, num_clusters,
+						      &dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				break;
+			}
+		}
+		cpos += num_clusters;
+	}
+
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	brelse(ref_root_bh);
+
+	if (!ret && ocfs2_dealloc_has_cluster(&dealloc)) {
+		ocfs2_schedule_truncate_log_flush(osb, 1);
+		ocfs2_run_deallocs(osb, &dealloc);
+	}
+out:
+	/*
+	 * Empty the extent map so that we may get the right extent
+	 * record from the disk.
+	 */
+	ocfs2_extent_map_trunc(inode, 0);
+
+	return ret;
+}
+
+static int ocfs2_add_refcounted_extent(struct inode *inode,
+				   struct ocfs2_extent_tree *et,
+				   struct ocfs2_caching_info *ref_ci,
+				   struct buffer_head *ref_root_bh,
+				   u32 cpos, u32 p_cluster, u32 num_clusters,
+				   unsigned int ext_flags,
+				   struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret;
+	handle_t *handle;
+	int credits = 0;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_alloc_context *meta_ac = NULL;
+
+	ret = ocfs2_lock_refcount_allocators(inode->i_sb,
+					     p_cluster, num_clusters,
+					     et, ref_ci,
+					     ref_root_bh, &meta_ac,
+					     NULL, &credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_insert_extent(handle, et, cpos,
+			cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb,
+							     p_cluster)),
+			num_clusters, ext_flags, meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+					p_cluster, num_clusters,
+					meta_ac, dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+out:
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+static int ocfs2_duplicate_extent_list(struct inode *s_inode,
+				struct inode *t_inode,
+				struct buffer_head *t_bh,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0;
+	u32 p_cluster, num_clusters, clusters, cpos;
+	loff_t size;
+	unsigned int ext_flags;
+	struct ocfs2_extent_tree et;
+
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(t_inode), t_bh);
+
+	size = i_size_read(s_inode);
+	clusters = ocfs2_clusters_for_bytes(s_inode->i_sb, size);
+
+	cpos = 0;
+	while (cpos < clusters) {
+		ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster,
+					 &num_clusters, &ext_flags);
+
+		if (p_cluster) {
+			ret = ocfs2_add_refcounted_extent(t_inode, &et,
+							  ref_ci, ref_root_bh,
+							  cpos, p_cluster,
+							  num_clusters,
+							  ext_flags,
+							  dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+
+		cpos += num_clusters;
+	}
+
+out:
+	return ret;
+}
+
+static int ocfs2_create_reflink_node(struct inode *s_inode,
+				     struct buffer_head *s_bh,
+				     struct inode *t_inode,
+				     struct buffer_head *t_bh)
+{
+	int ret;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+	struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb);
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data;
+	struct ocfs2_refcount_tree *ref_tree;
+
+	ocfs2_init_dealloc_ctxt(&dealloc);
+
+	ret = ocfs2_set_refcount_tree(t_inode, t_bh,
+				      le64_to_cpu(di->i_refcount_loc));
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
+				       1, &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
+					  &ref_tree->rf_ci, ref_root_bh,
+					  &dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+	brelse(ref_root_bh);
+out:
+	if (ocfs2_dealloc_has_cluster(&dealloc)) {
+		ocfs2_schedule_truncate_log_flush(osb, 1);
+		ocfs2_run_deallocs(osb, &dealloc);
+	}
+
+	return ret;
+}
-- 
cgit v1.2.3


From a9063ab9a3827483007124bdb6f9877f0ab4c3f5 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:40:59 +0800
Subject: ocfs2: handle file attributes issue for reflink.

A reflink creates a snapshot of a file, that means the attributes
must be identical except for three exceptions - nlink, ino, and ctime.

As for time changes, Here is a brief description:

1. Source file:
   1) atime: Ignore. Let the lazy atime code handle that.
   2) mtime: don't touch.
   3) ctime: If we change the tree (adding REFCOUNTED to at least one
             extent), update it.
2. Destination file:
   1) atime: ignore.
   2) mtime: we want it to appear identical to the source.
   3) ctime: update.

The idea here is that an ls -l will show the same time for the
src and target - it shows mtime.  Backup software like rsync and tar
will treat the new file correctly too.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 120 insertions(+), 2 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index e3171c483685..62d21c6ce1d9 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3350,10 +3350,44 @@ out:
 	return ret;
 }
 
+static int ocfs2_change_ctime(struct inode *inode,
+			      struct buffer_head *di_bh)
+{
+	int ret;
+	handle_t *handle;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+
+	handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb),
+				   OCFS2_INODE_UPDATE_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	inode->i_ctime = CURRENT_TIME;
+	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+
+	ocfs2_journal_dirty(handle, di_bh);
+
+out_commit:
+	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+out:
+	return ret;
+}
+
 static int ocfs2_attach_refcount_tree(struct inode *inode,
 				      struct buffer_head *di_bh)
 {
-	int ret;
+	int ret, data_changed = 0;
 	struct buffer_head *ref_root_bh = NULL;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -3402,12 +3436,21 @@ static int ocfs2_attach_refcount_tree(struct inode *inode,
 						      &dealloc);
 			if (ret) {
 				mlog_errno(ret);
-				break;
+				goto unlock;
 			}
+
+			data_changed = 1;
 		}
 		cpos += num_clusters;
 	}
 
+	if (data_changed) {
+		ret = ocfs2_change_ctime(inode, di_bh);
+		if (ret)
+			mlog_errno(ret);
+	}
+
+unlock:
 	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	brelse(ref_root_bh);
 
@@ -3522,6 +3565,74 @@ out:
 	return ret;
 }
 
+/*
+ * change the new file's attributes to the src.
+ *
+ * reflink creates a snapshot of a file, that means the attributes
+ * must be identical except for three exceptions - nlink, ino, and ctime.
+ */
+static int ocfs2_complete_reflink(struct inode *s_inode,
+				  struct buffer_head *s_bh,
+				  struct inode *t_inode,
+				  struct buffer_head *t_bh)
+{
+	int ret;
+	handle_t *handle;
+	struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data;
+	loff_t size = i_size_read(s_inode);
+
+	handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb),
+				   OCFS2_INODE_UPDATE_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		return ret;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	spin_lock(&OCFS2_I(t_inode)->ip_lock);
+	OCFS2_I(t_inode)->ip_clusters = OCFS2_I(s_inode)->ip_clusters;
+	OCFS2_I(t_inode)->ip_attr = OCFS2_I(s_inode)->ip_attr;
+	OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features;
+	spin_unlock(&OCFS2_I(t_inode)->ip_lock);
+	i_size_write(t_inode, size);
+
+	di->i_xattr_inline_size = s_di->i_xattr_inline_size;
+	di->i_clusters = s_di->i_clusters;
+	di->i_size = s_di->i_size;
+	di->i_dyn_features = s_di->i_dyn_features;
+	di->i_attr = s_di->i_attr;
+	di->i_uid = s_di->i_uid;
+	di->i_gid = s_di->i_gid;
+	di->i_mode = s_di->i_mode;
+
+	/*
+	 * update time.
+	 * we want mtime to appear identical to the source and update ctime.
+	 */
+	t_inode->i_ctime = CURRENT_TIME;
+
+	di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
+	di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
+
+	t_inode->i_mtime = s_inode->i_mtime;
+	di->i_mtime = s_di->i_mtime;
+	di->i_mtime_nsec = s_di->i_mtime_nsec;
+
+	ocfs2_journal_dirty(handle, t_bh);
+
+out_commit:
+	ocfs2_commit_trans(OCFS2_SB(t_inode->i_sb), handle);
+	return ret;
+}
+
 static int ocfs2_create_reflink_node(struct inode *s_inode,
 				     struct buffer_head *s_bh,
 				     struct inode *t_inode,
@@ -3555,9 +3666,16 @@ static int ocfs2_create_reflink_node(struct inode *s_inode,
 	ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
 					  &ref_tree->rf_ci, ref_root_bh,
 					  &dealloc);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_unlock_refcount;
+	}
+
+	ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh);
 	if (ret)
 		mlog_errno(ret);
 
+out_unlock_refcount:
 	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	brelse(ref_root_bh);
 out:
-- 
cgit v1.2.3


From 1061f9c1c9f81ed88b5d268a95d8e3ace80da63a Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:41:57 +0800
Subject: ocfs2: Return extent flags for xattr value tree.

With the new refcount tree, xattr value can also be refcounted
among multiple files. So return the appropriate extent flags
so that CoW can used it later.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/extent_map.c | 6 +++++-
 fs/ocfs2/extent_map.h | 3 ++-
 fs/ocfs2/xattr.c      | 7 ++++---
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 40b51056bb32..843db64e9d4a 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -541,7 +541,8 @@ static void ocfs2_relative_extent_offsets(struct super_block *sb,
 
 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 			     u32 *p_cluster, u32 *num_clusters,
-			     struct ocfs2_extent_list *el)
+			     struct ocfs2_extent_list *el,
+			     unsigned int *extent_flags)
 {
 	int ret = 0, i;
 	struct buffer_head *eb_bh = NULL;
@@ -593,6 +594,9 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 		*p_cluster = *p_cluster + coff;
 		if (num_clusters)
 			*num_clusters = ocfs2_rec_clusters(el, rec) - coff;
+
+		if (extent_flags)
+			*extent_flags = rec->e_flags;
 	}
 out:
 	if (eb_bh)
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 9942f47efda7..e79d41c2c909 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -55,7 +55,8 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 			     u32 *p_cluster, u32 *num_clusters,
-			     struct ocfs2_extent_list *el);
+			     struct ocfs2_extent_list *el,
+			     unsigned int *extent_flags);
 
 int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 			   struct buffer_head *bhs[], int flags,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 1bf12c453f99..dda49c00362a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -704,7 +704,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 	while (trunc_len) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
 					       &alloc_size,
-					       &vb->vb_xv->xr_list);
+					       &vb->vb_xv->xr_list, NULL);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -959,7 +959,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
 	cpos = 0;
 	while (cpos < clusters) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
-					       &num_clusters, el);
+					       &num_clusters, el, NULL);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -1198,7 +1198,8 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 
 	while (cpos < clusters) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
-					       &num_clusters, &xv->xr_list);
+					       &num_clusters, &xv->xr_list,
+					       NULL);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
-- 
cgit v1.2.3


From 913580b4cd445c4fb25d7cf167911a8cf6bdb1eb Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 24 Aug 2009 14:31:03 +0800
Subject: ocfs2: Abstract duplicate clusters process in CoW.

We currently use pagecache to duplicate clusters in CoW,
but it isn't suitable for xattr case. So abstract it out
so that the caller can decide which method it use.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c | 127 +++++++++++++++++++++++++++---------------------
 1 file changed, 71 insertions(+), 56 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 62d21c6ce1d9..40de7bb9e9a6 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -45,12 +45,20 @@ struct ocfs2_cow_context {
 	struct inode *inode;
 	u32 cow_start;
 	u32 cow_len;
-	struct ocfs2_extent_tree di_et;
-	struct ocfs2_caching_info *ref_ci;
+	struct ocfs2_extent_tree data_et;
+	struct ocfs2_refcount_tree *ref_tree;
 	struct buffer_head *ref_root_bh;
 	struct ocfs2_alloc_context *meta_ac;
 	struct ocfs2_alloc_context *data_ac;
 	struct ocfs2_cached_dealloc_ctxt dealloc;
+	int (*get_clusters)(struct ocfs2_cow_context *context,
+			    u32 v_cluster, u32 *p_cluster,
+			    u32 *num_clusters,
+			    unsigned int *extent_flags);
+	int (*cow_duplicate_clusters)(handle_t *handle,
+				      struct ocfs2_cow_context *context,
+				      u32 cpos, u32 old_cluster,
+				      u32 new_cluster, u32 new_len);
 };
 
 static inline struct ocfs2_refcount_tree *
@@ -2489,7 +2497,7 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
  * get good I/O from the resulting extent tree.
  */
 static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
-					   struct buffer_head *di_bh,
+					   struct ocfs2_extent_list *el,
 					   u32 cpos,
 					   u32 write_len,
 					   u32 max_cpos,
@@ -2497,8 +2505,6 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
 					   u32 *cow_len)
 {
 	int ret = 0;
-	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
-	struct ocfs2_extent_list *el = &di->id2.i_list;
 	int tree_height = le16_to_cpu(el->l_tree_depth), i;
 	struct buffer_head *eb_bh = NULL;
 	struct ocfs2_extent_block *eb = NULL;
@@ -2769,13 +2775,13 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
 	return 0;
 }
 
-static int ocfs2_duplicate_clusters(handle_t *handle,
-				    struct ocfs2_cow_context *context,
-				    u32 cpos, u32 old_cluster,
-				    u32 new_cluster, u32 new_len)
+static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
+					    struct ocfs2_cow_context *context,
+					    u32 cpos, u32 old_cluster,
+					    u32 new_cluster, u32 new_len)
 {
 	int ret = 0, partial;
-	struct ocfs2_caching_info *ci = context->di_et.et_ci;
+	struct ocfs2_caching_info *ci = context->data_et.et_ci;
 	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
 	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
 	struct page *page;
@@ -2909,7 +2915,7 @@ static int ocfs2_replace_clusters(handle_t *handle,
 				  unsigned int ext_flags)
 {
 	int ret;
-	struct ocfs2_caching_info *ci = context->di_et.et_ci;
+	struct ocfs2_caching_info *ci = context->data_et.et_ci;
 	u64 ino = ocfs2_metadata_cache_owner(ci);
 
 	mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n",
@@ -2917,15 +2923,15 @@ static int ocfs2_replace_clusters(handle_t *handle,
 
 	/*If the old clusters is unwritten, no need to duplicate. */
 	if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
-		ret = ocfs2_duplicate_clusters(handle, context, cpos,
-					       old, new, len);
+		ret = context->cow_duplicate_clusters(handle, context, cpos,
+						      old, new, len);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 	}
 
-	ret = ocfs2_clear_ext_refcount(handle, &context->di_et,
+	ret = ocfs2_clear_ext_refcount(handle, &context->data_et,
 				       cpos, new, len, ext_flags,
 				       context->meta_ac, &context->dealloc);
 	if (ret)
@@ -2983,6 +2989,15 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
 	return ret;
 }
 
+static int ocfs2_di_get_clusters(struct ocfs2_cow_context *context,
+				 u32 v_cluster, u32 *p_cluster,
+				 u32 *num_clusters,
+				 unsigned int *extent_flags)
+{
+	return ocfs2_get_clusters(context->inode, v_cluster, p_cluster,
+				  num_clusters, extent_flags);
+}
+
 static int ocfs2_make_clusters_writable(struct super_block *sb,
 					struct ocfs2_cow_context *context,
 					u32 cpos, u32 p_cluster,
@@ -2994,14 +3009,15 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 	handle_t *handle;
 	struct buffer_head *ref_leaf_bh = NULL;
+	struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci;
 	struct ocfs2_refcount_rec rec;
 
 	mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n",
 	     cpos, p_cluster, num_clusters, e_flags);
 
 	ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
-					     &context->di_et,
-					     context->ref_ci,
+					     &context->data_et,
+					     ref_ci,
 					     context->ref_root_bh,
 					     &context->meta_ac,
 					     &context->data_ac, &credits);
@@ -3018,8 +3034,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 	}
 
 	while (num_clusters) {
-		ret = ocfs2_get_refcount_rec(context->ref_ci,
-					     context->ref_root_bh,
+		ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
 					     p_cluster, num_clusters,
 					     &rec, &index, &ref_leaf_bh);
 		if (ret) {
@@ -3041,7 +3056,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 		 */
 		if (le32_to_cpu(rec.r_refcount) == 1) {
 			delete = 0;
-			ret = ocfs2_clear_ext_refcount(handle, &context->di_et,
+			ret = ocfs2_clear_ext_refcount(handle,
+						       &context->data_et,
 						       cpos, p_cluster,
 						       set_len, e_flags,
 						       context->meta_ac,
@@ -3072,7 +3088,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 			set_len = new_len;
 		}
 
-		ret = __ocfs2_decrease_refcount(handle, context->ref_ci,
+		ret = __ocfs2_decrease_refcount(handle, ref_ci,
 						context->ref_root_bh,
 						p_cluster, set_len,
 						context->meta_ac,
@@ -3114,17 +3130,14 @@ out:
 	return ret;
 }
 
-static int ocfs2_replace_cow(struct inode *inode,
-			     struct buffer_head *di_bh,
-			     struct buffer_head *ref_root_bh,
-			     struct ocfs2_caching_info *ref_ci,
-			     u32 cow_start, u32 cow_len)
+static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
 {
 	int ret = 0;
-	u32 p_cluster, num_clusters, start = cow_start;
+	struct inode *inode = context->inode;
+	u32 cow_start = context->cow_start, cow_len = context->cow_len;
+	u32 p_cluster, num_clusters;
 	unsigned int ext_flags;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_cow_context *context;
 
 	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
 		ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
@@ -3133,26 +3146,11 @@ static int ocfs2_replace_cow(struct inode *inode,
 		return -EROFS;
 	}
 
-	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
-	if (!context) {
-		ret = -ENOMEM;
-		mlog_errno(ret);
-		return ret;
-	}
-
-	context->inode = inode;
-	context->cow_start = cow_start;
-	context->cow_len = cow_len;
-	context->ref_ci = ref_ci;
-	context->ref_root_bh = ref_root_bh;
-
 	ocfs2_init_dealloc_ctxt(&context->dealloc);
-	ocfs2_init_dinode_extent_tree(&context->di_et,
-				      INODE_CACHE(inode), di_bh);
 
 	while (cow_len) {
-		ret = ocfs2_get_clusters(inode, cow_start, &p_cluster,
-					 &num_clusters, &ext_flags);
+		ret = context->get_clusters(context, cow_start, &p_cluster,
+					    &num_clusters, &ext_flags);
 		if (ret) {
 			mlog_errno(ret);
 			break;
@@ -3175,20 +3173,11 @@ static int ocfs2_replace_cow(struct inode *inode,
 		cow_start += num_clusters;
 	}
 
-
-	/*
-	 * truncate the extent map here since no matter whether we meet with
-	 * any error during the action, we shouldn't trust cached extent map
-	 * any more.
-	 */
-	ocfs2_extent_map_trunc(inode, start);
-
 	if (ocfs2_dealloc_has_cluster(&context->dealloc)) {
 		ocfs2_schedule_truncate_log_flush(osb, 1);
 		ocfs2_run_deallocs(osb, &context->dealloc);
 	}
 
-	kfree(context);
 	return ret;
 }
 
@@ -3208,10 +3197,11 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 	struct buffer_head *ref_root_bh = NULL;
 	struct ocfs2_refcount_tree *ref_tree;
+	struct ocfs2_cow_context *context = NULL;
 
 	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
 
-	ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh,
+	ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list,
 					      cpos, write_len, max_cpos,
 					      &cow_start, &cow_len);
 	if (ret) {
@@ -3225,6 +3215,13 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
 
 	BUG_ON(cow_len == 0);
 
+	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
+	if (!context) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
 	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
 				       1, &ref_tree, &ref_root_bh);
 	if (ret) {
@@ -3232,14 +3229,32 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_replace_cow(inode, di_bh, ref_root_bh, &ref_tree->rf_ci,
-				cow_start, cow_len);
+	context->inode = inode;
+	context->cow_start = cow_start;
+	context->cow_len = cow_len;
+	context->ref_tree = ref_tree;
+	context->ref_root_bh = ref_root_bh;
+	context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
+	context->get_clusters = ocfs2_di_get_clusters;
+
+	ocfs2_init_dinode_extent_tree(&context->data_et,
+				      INODE_CACHE(inode), di_bh);
+
+	ret = ocfs2_replace_cow(context);
 	if (ret)
 		mlog_errno(ret);
 
+	/*
+	 * truncate the extent map here since no matter whether we meet with
+	 * any error during the action, we shouldn't trust cached extent map
+	 * any more.
+	 */
+	ocfs2_extent_map_trunc(inode, cow_start);
+
 	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	brelse(ref_root_bh);
 out:
+	kfree(context);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 492a8a33e1cb966fa0b5756c5fc11d30c8f8848e Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:17 +0800
Subject: ocfs2: Add CoW support for xattr.

In order to make 2 transcation(xattr and cow) independent with each other,
we CoW the whole xattr out in case we are setting them.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c | 246 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ocfs2/refcounttree.h |  29 ++++++
 fs/ocfs2/xattr.c        | 234 ++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 494 insertions(+), 15 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 40de7bb9e9a6..a5b5bef054a7 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -32,6 +32,7 @@
 #include "dlmglue.h"
 #include "extent_map.h"
 #include "aops.h"
+#include "xattr.h"
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
@@ -51,6 +52,9 @@ struct ocfs2_cow_context {
 	struct ocfs2_alloc_context *meta_ac;
 	struct ocfs2_alloc_context *data_ac;
 	struct ocfs2_cached_dealloc_ctxt dealloc;
+	void *cow_object;
+	struct ocfs2_post_refcount *post_refcount;
+	int extra_credits;
 	int (*get_clusters)(struct ocfs2_cow_context *context,
 			    u32 v_cluster, u32 *p_cluster,
 			    u32 *num_clusters,
@@ -2848,6 +2852,65 @@ unlock:
 	return ret;
 }
 
+static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
+					   struct ocfs2_cow_context *context,
+					   u32 cpos, u32 old_cluster,
+					   u32 new_cluster, u32 new_len)
+{
+	int ret = 0;
+	struct super_block *sb = context->inode->i_sb;
+	struct ocfs2_caching_info *ci = context->data_et.et_ci;
+	int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
+	u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster);
+	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+	struct buffer_head *old_bh = NULL;
+	struct buffer_head *new_bh = NULL;
+
+	mlog(0, "old_cluster %u, new %u, len %u\n", old_cluster,
+	     new_cluster, new_len);
+
+	for (i = 0; i < blocks; i++, old_block++, new_block++) {
+		new_bh = sb_getblk(osb->sb, new_block);
+		if (new_bh == NULL) {
+			ret = -EIO;
+			mlog_errno(ret);
+			break;
+		}
+
+		ocfs2_set_new_buffer_uptodate(ci, new_bh);
+
+		ret = ocfs2_read_block(ci, old_block, &old_bh, NULL);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ret = ocfs2_journal_access(handle, ci, new_bh,
+					   OCFS2_JOURNAL_ACCESS_CREATE);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
+		ret = ocfs2_journal_dirty(handle, new_bh);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		brelse(new_bh);
+		brelse(old_bh);
+		new_bh = NULL;
+		old_bh = NULL;
+	}
+
+	brelse(new_bh);
+	brelse(old_bh);
+	return ret;
+}
+
 static int ocfs2_clear_ext_refcount(handle_t *handle,
 				    struct ocfs2_extent_tree *et,
 				    u32 cpos, u32 p_cluster, u32 len,
@@ -3026,6 +3089,10 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 		return ret;
 	}
 
+	if (context->post_refcount)
+		credits += context->post_refcount->credits;
+
+	credits += context->extra_credits;
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -3105,13 +3172,25 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 		ref_leaf_bh = NULL;
 	}
 
+	/* handle any post_cow action. */
+	if (context->post_refcount && context->post_refcount->func) {
+		ret = context->post_refcount->func(context->inode, handle,
+						context->post_refcount->para);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
+	}
+
 	/*
 	 * Here we should write the new page out first if we are
 	 * in write-back mode.
 	 */
-	ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
-	if (ret)
-		mlog_errno(ret);
+	if (context->get_clusters == ocfs2_di_get_clusters) {
+		ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+		if (ret)
+			mlog_errno(ret);
+	}
 
 out_commit:
 	ocfs2_commit_trans(osb, handle);
@@ -3298,6 +3377,167 @@ int ocfs2_refcount_cow(struct inode *inode,
 	return ret;
 }
 
+static int ocfs2_xattr_value_get_clusters(struct ocfs2_cow_context *context,
+					  u32 v_cluster, u32 *p_cluster,
+					  u32 *num_clusters,
+					  unsigned int *extent_flags)
+{
+	struct inode *inode = context->inode;
+	struct ocfs2_xattr_value_root *xv = context->cow_object;
+
+	return ocfs2_xattr_get_clusters(inode, v_cluster, p_cluster,
+					num_clusters, &xv->xr_list,
+					extent_flags);
+}
+
+/*
+ * Given a xattr value root, calculate the most meta/credits we need for
+ * refcount tree change if we truncate it to 0.
+ */
+int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
+				       struct ocfs2_caching_info *ref_ci,
+				       struct buffer_head *ref_root_bh,
+				       struct ocfs2_xattr_value_root *xv,
+				       int *meta_add, int *credits)
+{
+	int ret = 0, index, ref_blocks = 0;
+	u32 p_cluster, num_clusters;
+	u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters);
+	struct ocfs2_refcount_block *rb;
+	struct ocfs2_refcount_rec rec;
+	struct buffer_head *ref_leaf_bh = NULL;
+
+	while (cpos < clusters) {
+		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
+					       &num_clusters, &xv->xr_list,
+					       NULL);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		cpos += num_clusters;
+
+		while (num_clusters) {
+			ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh,
+						     p_cluster, num_clusters,
+						     &rec, &index,
+						     &ref_leaf_bh);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			BUG_ON(!rec.r_refcount);
+
+			rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
+
+			/*
+			 * We really don't know whether the other clusters is in
+			 * this refcount block or not, so just take the worst
+			 * case that all the clusters are in this block and each
+			 * one will split a refcount rec, so totally we need
+			 * clusters * 2 new refcount rec.
+			 */
+			if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
+			    le16_to_cpu(rb->rf_records.rl_count))
+				ref_blocks++;
+
+			*credits += 1;
+			brelse(ref_leaf_bh);
+			ref_leaf_bh = NULL;
+
+			if (num_clusters <= le32_to_cpu(rec.r_clusters))
+				break;
+			else
+				num_clusters -= le32_to_cpu(rec.r_clusters);
+			p_cluster += num_clusters;
+		}
+	}
+
+	*meta_add += ref_blocks;
+	if (!ref_blocks)
+		goto out;
+
+	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
+	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
+		*credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
+	else {
+		struct ocfs2_extent_tree et;
+
+		ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh);
+		*credits += ocfs2_calc_extend_credits(inode->i_sb,
+						      et.et_root_el,
+						      ref_blocks);
+	}
+
+out:
+	brelse(ref_leaf_bh);
+	return ret;
+}
+
+/*
+ * Do CoW for xattr.
+ */
+int ocfs2_refcount_cow_xattr(struct inode *inode,
+			     struct ocfs2_dinode *di,
+			     struct ocfs2_xattr_value_buf *vb,
+			     struct ocfs2_refcount_tree *ref_tree,
+			     struct buffer_head *ref_root_bh,
+			     u32 cpos, u32 write_len,
+			     struct ocfs2_post_refcount *post)
+{
+	int ret;
+	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_cow_context *context = NULL;
+	u32 cow_start, cow_len;
+
+	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+
+	ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list,
+					      cpos, write_len, UINT_MAX,
+					      &cow_start, &cow_len);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	BUG_ON(cow_len == 0);
+
+	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
+	if (!context) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	context->inode = inode;
+	context->cow_start = cow_start;
+	context->cow_len = cow_len;
+	context->ref_tree = ref_tree;
+	context->ref_root_bh = ref_root_bh;;
+	context->cow_object = xv;
+
+	context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
+	/* We need the extra credits for duplicate_clusters by jbd. */
+	context->extra_credits =
+		ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len;
+	context->get_clusters = ocfs2_xattr_value_get_clusters;
+	context->post_refcount = post;
+
+	ocfs2_init_xattr_value_extent_tree(&context->data_et,
+					   INODE_CACHE(inode), vb);
+
+	ret = ocfs2_replace_cow(context);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	kfree(context);
+	return ret;
+}
+
 /*
  * Insert a new extent into refcount tree and mark a extent rec
  * as refcounted in the dinode tree.
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 356f99c85635..d09d64b29810 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -54,4 +54,33 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 					  struct ocfs2_alloc_context **meta_ac);
 int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
 		       u32 cpos, u32 write_len, u32 max_cpos);
+
+typedef int (ocfs2_post_refcount_func)(struct inode *inode,
+				       handle_t *handle,
+				       void *para);
+/*
+ * Some refcount caller need to do more work after we modify the data b-tree
+ * during refcount operation(including CoW and add refcount flag), and make the
+ * transaction complete. So it must give us this structure so that we can do it
+ * within our transaction.
+ *
+ */
+struct ocfs2_post_refcount {
+	int credits;			/* credits it need for journal. */
+	ocfs2_post_refcount_func *func;	/* real function. */
+	void *para;
+};
+
+int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
+				       struct ocfs2_caching_info *ref_ci,
+				       struct buffer_head *ref_root_bh,
+				       struct ocfs2_xattr_value_root *xv,
+				       int *meta_add, int *credits);
+int ocfs2_refcount_cow_xattr(struct inode *inode,
+			     struct ocfs2_dinode *di,
+			     struct ocfs2_xattr_value_buf *vb,
+			     struct ocfs2_refcount_tree *ref_tree,
+			     struct buffer_head *ref_root_bh,
+			     u32 cpos, u32 write_len,
+			     struct ocfs2_post_refcount *post);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index dda49c00362a..a538cebbe9c5 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -55,7 +55,7 @@
 #include "buffer_head_io.h"
 #include "super.h"
 #include "xattr.h"
-
+#include "refcounttree.h"
 
 struct ocfs2_xattr_def_value_root {
 	struct ocfs2_xattr_value_root	xv;
@@ -176,6 +176,14 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
 				  u64 src_blk, u64 last_blk, u64 to_blk,
 				  unsigned int start_bucket,
 				  u32 *first_hash);
+static int ocfs2_prepare_refcount_xattr(struct inode *inode,
+					struct ocfs2_dinode *di,
+					struct ocfs2_xattr_info *xi,
+					struct ocfs2_xattr_search *xis,
+					struct ocfs2_xattr_search *xbs,
+					struct ocfs2_refcount_tree **ref_tree,
+					int *meta_need,
+					int *credits);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -647,6 +655,7 @@ leave:
 static int __ocfs2_remove_xattr_range(struct inode *inode,
 				      struct ocfs2_xattr_value_buf *vb,
 				      u32 cpos, u32 phys_cpos, u32 len,
+				      unsigned int ext_flags,
 				      struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret;
@@ -678,7 +687,14 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
+	if (ext_flags & OCFS2_EXT_REFCOUNTED)
+		ret = ocfs2_decrease_refcount(inode, handle,
+					ocfs2_blocks_to_clusters(inode->i_sb,
+								 phys_blkno),
+					len, ctxt->meta_ac, &ctxt->dealloc, 1);
+	else
+		ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
+						  phys_blkno, len);
 	if (ret)
 		mlog_errno(ret);
 
@@ -693,6 +709,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 				   struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	int ret = 0;
+	unsigned int ext_flags;
 	u32 trunc_len, cpos, phys_cpos, alloc_size;
 	u64 block;
 
@@ -704,7 +721,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 	while (trunc_len) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
 					       &alloc_size,
-					       &vb->vb_xv->xr_list, NULL);
+					       &vb->vb_xv->xr_list, &ext_flags);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -715,7 +732,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
 
 		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
 						 phys_cpos, alloc_size,
-						 ctxt);
+						 ext_flags, ctxt);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -1182,7 +1199,7 @@ static int ocfs2_xattr_get(struct inode *inode,
 
 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 					   handle_t *handle,
-					   struct ocfs2_xattr_value_root *xv,
+					   struct ocfs2_xattr_value_buf *vb,
 					   const void *value,
 					   int value_len)
 {
@@ -1193,18 +1210,22 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
 	u64 blkno;
 	struct buffer_head *bh = NULL;
+	unsigned int ext_flags;
+	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
 
 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
 
 	while (cpos < clusters) {
 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
 					       &num_clusters, &xv->xr_list,
-					       NULL);
+					       &ext_flags);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
 
+		BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
+
 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 
 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
@@ -1356,7 +1377,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
 		mlog_errno(ret);
 		return ret;
 	}
-	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv,
+	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
 					      xi->value, xi->value_len);
 	if (ret < 0)
 		mlog_errno(ret);
@@ -1595,7 +1616,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
 
 				ret = __ocfs2_xattr_set_value_outside(inode,
 								handle,
-								vb.vb_xv,
+								&vb,
 								xi->value,
 								xi->value_len);
 				if (ret < 0)
@@ -2431,6 +2452,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
 				     struct ocfs2_xattr_search *xis,
 				     struct ocfs2_xattr_search *xbs,
 				     struct ocfs2_xattr_set_ctxt *ctxt,
+				     int extra_meta,
 				     int *credits)
 {
 	int clusters_add, meta_add, ret;
@@ -2447,6 +2469,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
 		return ret;
 	}
 
+	meta_add += extra_meta;
 	mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
 	     "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
 
@@ -2714,10 +2737,11 @@ int ocfs2_xattr_set(struct inode *inode,
 {
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_dinode *di;
-	int ret, credits;
+	int ret, credits, ref_meta = 0, ref_credits = 0;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
+	struct ocfs2_refcount_tree *ref_tree = NULL;
 
 	struct ocfs2_xattr_info xi = {
 		.name_index = name_index,
@@ -2782,6 +2806,17 @@ int ocfs2_xattr_set(struct inode *inode,
 			goto cleanup;
 	}
 
+	/* Check whether the value is refcounted and do some prepartion. */
+	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
+	    (!xis.not_found || !xbs.not_found)) {
+		ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
+						   &xis, &xbs, &ref_tree,
+						   &ref_meta, &ref_credits);
+		if (ret) {
+			mlog_errno(ret);
+			goto cleanup;
+		}
+	}
 
 	mutex_lock(&tl_inode->i_mutex);
 
@@ -2796,7 +2831,7 @@ int ocfs2_xattr_set(struct inode *inode,
 	mutex_unlock(&tl_inode->i_mutex);
 
 	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
-					&xbs, &ctxt, &credits);
+					&xbs, &ctxt, ref_meta, &credits);
 	if (ret) {
 		mlog_errno(ret);
 		goto cleanup;
@@ -2804,7 +2839,7 @@ int ocfs2_xattr_set(struct inode *inode,
 
 	/* we need to update inode's ctime field, so add credit for it. */
 	credits += OCFS2_INODE_UPDATE_CREDITS;
-	ctxt.handle = ocfs2_start_trans(osb, credits);
+	ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
 	if (IS_ERR(ctxt.handle)) {
 		ret = PTR_ERR(ctxt.handle);
 		mlog_errno(ret);
@@ -2823,6 +2858,8 @@ int ocfs2_xattr_set(struct inode *inode,
 		ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
 cleanup:
+	if (ref_tree)
+		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
 	ocfs2_inode_unlock(inode, 1);
 cleanup_nolock:
@@ -4802,6 +4839,9 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
 	struct ocfs2_xattr_entry *xe = xs->here;
 	struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
 	void *base;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_access = ocfs2_journal_access,
+	};
 
 	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
 
@@ -4818,8 +4858,10 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
 	xv = (struct ocfs2_xattr_value_root *)(base + offset +
 		 OCFS2_XATTR_SIZE(xe->xe_name_len));
 
+	vb.vb_xv = xv;
+	vb.vb_bh = xs->bucket->bu_bhs[block_off];
 	ret = __ocfs2_xattr_set_value_outside(inode, handle,
-					      xv, val, value_len);
+					      &vb, val, value_len);
 	if (ret)
 		mlog_errno(ret);
 out:
@@ -5310,6 +5352,174 @@ out:
 	return ret;
 }
 
+/*
+ * Whenever we modify a xattr value root in the bucket(e.g, CoW
+ * or change the extent record flag), we need to recalculate
+ * the metaecc for the whole bucket. So it is done here.
+ *
+ * Note:
+ * We have to give the extra credits for the caller.
+ */
+static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
+					    handle_t *handle,
+					    void *para)
+{
+	int ret;
+	struct ocfs2_xattr_bucket *bucket =
+			(struct ocfs2_xattr_bucket *)para;
+
+	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
+
+	return 0;
+}
+
+/*
+ * Special action we need if the xattr value is refcounted.
+ *
+ * 1. If the xattr is refcounted, lock the tree.
+ * 2. CoW the xattr if we are setting the new value and the value
+ *    will be stored outside.
+ * 3. In other case, decrease_refcount will work for us, so just
+ *    lock the refcount tree, calculate the meta and credits is OK.
+ *
+ * We have to do CoW before ocfs2_init_xattr_set_ctxt since
+ * currently CoW is a completed transaction, while this function
+ * will also lock the allocators and let us deadlock. So we will
+ * CoW the whole xattr value.
+ */
+static int ocfs2_prepare_refcount_xattr(struct inode *inode,
+					struct ocfs2_dinode *di,
+					struct ocfs2_xattr_info *xi,
+					struct ocfs2_xattr_search *xis,
+					struct ocfs2_xattr_search *xbs,
+					struct ocfs2_refcount_tree **ref_tree,
+					int *meta_add,
+					int *credits)
+{
+	int ret = 0;
+	struct ocfs2_xattr_block *xb;
+	struct ocfs2_xattr_entry *xe;
+	char *base;
+	u32 p_cluster, num_clusters;
+	unsigned int ext_flags;
+	int name_offset, name_len;
+	struct ocfs2_xattr_value_buf vb;
+	struct ocfs2_xattr_bucket *bucket = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_post_refcount refcount;
+	struct ocfs2_post_refcount *p = NULL;
+	struct buffer_head *ref_root_bh = NULL;
+
+	if (!xis->not_found) {
+		xe = xis->here;
+		name_offset = le16_to_cpu(xe->xe_name_offset);
+		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+		base = xis->base;
+		vb.vb_bh = xis->inode_bh;
+		vb.vb_access = ocfs2_journal_access_di;
+	} else {
+		int i, block_off = 0;
+		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
+		xe = xbs->here;
+		name_offset = le16_to_cpu(xe->xe_name_offset);
+		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+		i = xbs->here - xbs->header->xh_entries;
+
+		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
+			ret = ocfs2_xattr_bucket_get_name_value(inode,
+							bucket_xh(xbs->bucket),
+							i, &block_off,
+							&name_offset);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+			base = bucket_block(xbs->bucket, block_off);
+			vb.vb_bh = xbs->bucket->bu_bhs[block_off];
+			vb.vb_access = ocfs2_journal_access;
+
+			if (ocfs2_meta_ecc(osb)) {
+				/*create parameters for ocfs2_post_refcount. */
+				bucket = xbs->bucket;
+				refcount.credits = bucket->bu_blocks;
+				refcount.para = bucket;
+				refcount.func =
+					ocfs2_xattr_bucket_post_refcount;
+				p = &refcount;
+			}
+		} else {
+			base = xbs->base;
+			vb.vb_bh = xbs->xattr_bh;
+			vb.vb_access = ocfs2_journal_access_xb;
+		}
+	}
+
+	if (ocfs2_xattr_is_local(xe))
+		goto out;
+
+	vb.vb_xv = (struct ocfs2_xattr_value_root *)
+				(base + name_offset + name_len);
+
+	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
+				       &num_clusters, &vb.vb_xv->xr_list,
+				       &ext_flags);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * We just need to check the 1st extent record, since we always
+	 * CoW the whole xattr. So there shouldn't be a xattr with
+	 * some REFCOUNT extent recs after the 1st one.
+	 */
+	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
+		goto out;
+
+	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
+				       1, ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * If we are deleting the xattr or the new size will be stored inside,
+	 * cool, leave it there, the xattr truncate process will remove them
+	 * for us(it still needs the refcount tree lock and the meta, credits).
+	 * And the worse case is that every cluster truncate will split the
+	 * refcount tree, and make the original extent become 3. So we will need
+	 * 2 * cluster more extent recs at most.
+	 */
+	if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) {
+
+		ret = ocfs2_refcounted_xattr_delete_need(inode,
+							 &(*ref_tree)->rf_ci,
+							 ref_root_bh, vb.vb_xv,
+							 meta_add, credits);
+		if (ret)
+			mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
+				       *ref_tree, ref_root_bh, 0,
+				       le32_to_cpu(vb.vb_xv->xr_clusters), p);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	brelse(ref_root_bh);
+	return ret;
+}
+
 /*
  * 'security' attributes support
  */
-- 
cgit v1.2.3


From fd68a894fc9641f816d9cffa58e853ba91cbc1a1 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:21 +0800
Subject: ocfs2: Remove inode from ocfs2_xattr_bucket_get_name_value.

In ocfs2_xattr_bucket_get_name_value, actually we only use
super_block. So use it.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/xattr.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index a538cebbe9c5..eeb5b7caf195 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -140,7 +140,7 @@ struct ocfs2_xattr_search {
 	int not_found;
 };
 
-static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
+static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
 					     struct ocfs2_xattr_header *xh,
 					     int index,
 					     int *block_off,
@@ -1101,7 +1101,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 		i = xs->here - xs->header->xh_entries;
 
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
-			ret = ocfs2_xattr_bucket_get_name_value(inode,
+			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 								bucket_xh(xs->bucket),
 								i,
 								&block_off,
@@ -2297,7 +2297,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
 		old_in_xb = 1;
 
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
-			ret = ocfs2_xattr_bucket_get_name_value(inode,
+			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 							bucket_xh(xbs->bucket),
 							i, &block_off,
 							&name_offset);
@@ -2972,7 +2972,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
 		if (cmp)
 			continue;
 
-		ret = ocfs2_xattr_bucket_get_name_value(inode,
+		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 							xh,
 							i,
 							&block_off,
@@ -3216,7 +3216,7 @@ struct ocfs2_xattr_tree_list {
 	size_t result;
 };
 
-static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
+static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
 					     struct ocfs2_xattr_header *xh,
 					     int index,
 					     int *block_off,
@@ -3229,8 +3229,8 @@ static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
 
 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
 
-	*block_off = name_offset >> inode->i_sb->s_blocksize_bits;
-	*new_offset = name_offset % inode->i_sb->s_blocksize;
+	*block_off = name_offset >> sb->s_blocksize_bits;
+	*new_offset = name_offset % sb->s_blocksize;
 
 	return 0;
 }
@@ -3250,7 +3250,7 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
 		prefix = ocfs2_xattr_prefix(type);
 
 		if (prefix) {
-			ret = ocfs2_xattr_bucket_get_name_value(inode,
+			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 								bucket_xh(bucket),
 								i,
 								&block_off,
@@ -4845,7 +4845,7 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
 
 	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
 
-	ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
+	ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
 						xe - xh->xh_entries,
 						&block_off,
 						&offset);
@@ -5433,7 +5433,7 @@ static int ocfs2_prepare_refcount_xattr(struct inode *inode,
 		i = xbs->here - xbs->header->xh_entries;
 
 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
-			ret = ocfs2_xattr_bucket_get_name_value(inode,
+			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
 							bucket_xh(xbs->bucket),
 							i, &block_off,
 							&name_offset);
-- 
cgit v1.2.3


From 5aea1f0ef4024ba28213c10181e1b16ec678c82d Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:24 +0800
Subject: ocfs2: Abstract the creation of xattr block.

In xattr reflink, we also need to create xattr block, so
abstract the process out.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/xattr.c | 115 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 70 insertions(+), 45 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index eeb5b7caf195..a9339eb94a2e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2105,6 +2105,72 @@ cleanup:
 	return ret;
 }
 
+static int ocfs2_create_xattr_block(handle_t *handle,
+				    struct inode *inode,
+				    struct buffer_head *inode_bh,
+				    struct ocfs2_alloc_context *meta_ac,
+				    struct buffer_head **ret_bh)
+{
+	int ret;
+	u16 suballoc_bit_start;
+	u32 num_got;
+	u64 first_blkno;
+	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct buffer_head *new_bh = NULL;
+	struct ocfs2_xattr_block *xblk;
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto end;
+	}
+
+	ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
+				   &suballoc_bit_start, &num_got,
+				   &first_blkno);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto end;
+	}
+
+	new_bh = sb_getblk(inode->i_sb, first_blkno);
+	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
+
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
+				      new_bh,
+				      OCFS2_JOURNAL_ACCESS_CREATE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto end;
+	}
+
+	/* Initialize ocfs2_xattr_block */
+	xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
+	memset(xblk, 0, inode->i_sb->s_blocksize);
+	strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
+	xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
+	xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+	xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
+	xblk->xb_blkno = cpu_to_le64(first_blkno);
+
+	ret = ocfs2_journal_dirty(handle, new_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto end;
+	}
+	di->i_xattr_loc = cpu_to_le64(first_blkno);
+	ocfs2_journal_dirty(handle, inode_bh);
+
+	*ret_bh = new_bh;
+	new_bh = NULL;
+
+end:
+	brelse(new_bh);
+	return ret;
+}
+
 /*
  * ocfs2_xattr_block_set()
  *
@@ -2117,65 +2183,24 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 				 struct ocfs2_xattr_set_ctxt *ctxt)
 {
 	struct buffer_head *new_bh = NULL;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
 	handle_t *handle = ctxt->handle;
 	struct ocfs2_xattr_block *xblk = NULL;
-	u16 suballoc_bit_start;
-	u32 num_got;
-	u64 first_blkno;
 	int ret;
 
 	if (!xs->xattr_bh) {
-		ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
-					      xs->inode_bh,
-					      OCFS2_JOURNAL_ACCESS_CREATE);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto end;
-		}
-
-		ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
-					   &suballoc_bit_start, &num_got,
-					   &first_blkno);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto end;
-		}
-
-		new_bh = sb_getblk(inode->i_sb, first_blkno);
-		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
-
-		ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
-					      new_bh,
-					      OCFS2_JOURNAL_ACCESS_CREATE);
-		if (ret < 0) {
+		ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
+					       ctxt->meta_ac, &new_bh);
+		if (ret) {
 			mlog_errno(ret);
 			goto end;
 		}
 
-		/* Initialize ocfs2_xattr_block */
 		xs->xattr_bh = new_bh;
-		xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
-		memset(xblk, 0, inode->i_sb->s_blocksize);
-		strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
-		xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
-		xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
-		xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
-		xblk->xb_blkno = cpu_to_le64(first_blkno);
-
+		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
 		xs->header = &xblk->xb_attrs.xb_header;
 		xs->base = (void *)xs->header;
 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
 		xs->here = xs->header->xh_entries;
-
-		ret = ocfs2_journal_dirty(handle, new_bh);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto end;
-		}
-		di->i_xattr_loc = cpu_to_le64(first_blkno);
-		ocfs2_journal_dirty(handle, xs->inode_bh);
 	} else
 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
 
-- 
cgit v1.2.3


From 47bca4950bc40fb54e9d41cbbc8b06cd653d2ae2 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:42 +0800
Subject: ocfs2: Abstract ocfs2 xattr tree extend rec iteration process.

Currently we have ocfs2_iterate_xattr_buckets which can receive
a para and a callback to iterate a series of bucket. It is good.
But actually the 2 callers ocfs2_xattr_tree_list_index_block and
ocfs2_delete_xattr_index_block are almost the same. The only
difference is that the latter need to handle the extent record
also. So add a new function named ocfs2_iterate_xattr_index_block.
It can be given func callback which are used for exten record.
So now we only have one iteration function for the xattr index
block. Ane what's more, it is useful for our future reflink
operations.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/xattr.c | 147 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 76 insertions(+), 71 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index a9339eb94a2e..bfa7ee208855 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -157,7 +157,7 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
 					struct ocfs2_xattr_search *xs);
 
 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
-					struct ocfs2_xattr_tree_root *xt,
+					struct buffer_head *blk_bh,
 					char *buffer,
 					size_t buffer_size);
 
@@ -170,8 +170,23 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 					     struct ocfs2_xattr_search *xs,
 					     struct ocfs2_xattr_set_ctxt *ctxt);
 
-static int ocfs2_delete_xattr_index_block(struct inode *inode,
-					  struct buffer_head *xb_bh);
+typedef int (xattr_tree_rec_func)(struct inode *inode,
+				  struct buffer_head *root_bh,
+				  u64 blkno, u32 cpos, u32 len, void *para);
+static int ocfs2_iterate_xattr_index_block(struct inode *inode,
+					   struct buffer_head *root_bh,
+					   xattr_tree_rec_func *rec_func,
+					   void *para);
+static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
+					struct ocfs2_xattr_bucket *bucket,
+					void *para);
+static int ocfs2_rm_xattr_cluster(struct inode *inode,
+				  struct buffer_head *root_bh,
+				  u64 blkno,
+				  u32 cpos,
+				  u32 len,
+				  void *para);
+
 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
 				  u64 src_blk, u64 last_blk, u64 to_blk,
 				  unsigned int start_bucket,
@@ -870,11 +885,9 @@ static int ocfs2_xattr_block_list(struct inode *inode,
 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
 		ret = ocfs2_xattr_list_entries(inode, header,
 					       buffer, buffer_size);
-	} else {
-		struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
-		ret = ocfs2_xattr_tree_list_index_block(inode, xt,
+	} else
+		ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
 						   buffer, buffer_size);
-	}
 
 	brelse(blk_bh);
 
@@ -1801,7 +1814,10 @@ static int ocfs2_xattr_block_remove(struct inode *inode,
 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
 		ret = ocfs2_remove_value_outside(inode, &vb, header);
 	} else
-		ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
+		ret = ocfs2_iterate_xattr_index_block(inode,
+						blk_bh,
+						ocfs2_rm_xattr_cluster,
+						NULL);
 
 	return ret;
 }
@@ -3298,22 +3314,19 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
 	return ret;
 }
 
-static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
-					     struct ocfs2_xattr_tree_root *xt,
-					     char *buffer,
-					     size_t buffer_size)
+static int ocfs2_iterate_xattr_index_block(struct inode *inode,
+					   struct buffer_head *blk_bh,
+					   xattr_tree_rec_func *rec_func,
+					   void *para)
 {
-	struct ocfs2_extent_list *el = &xt->xt_list;
+	struct ocfs2_xattr_block *xb =
+			(struct ocfs2_xattr_block *)blk_bh->b_data;
+	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
 	int ret = 0;
 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
 	u64 p_blkno = 0;
-	struct ocfs2_xattr_tree_list xl = {
-		.buffer = buffer,
-		.buffer_size = buffer_size,
-		.result = 0,
-	};
 
-	if (le16_to_cpu(el->l_next_free_rec) == 0)
+	if (!el->l_next_free_rec || !rec_func)
 		return 0;
 
 	while (name_hash > 0) {
@@ -3321,16 +3334,15 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
 					  &e_cpos, &num_clusters, el);
 		if (ret) {
 			mlog_errno(ret);
-			goto out;
+			break;
 		}
 
-		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
-						  ocfs2_list_xattr_bucket,
-						  &xl);
+		ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
+			       num_clusters, para);
 		if (ret) {
 			if (ret != -ERANGE)
 				mlog_errno(ret);
-			goto out;
+			break;
 		}
 
 		if (e_cpos == 0)
@@ -3339,6 +3351,37 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
 		name_hash = e_cpos - 1;
 	}
 
+	return ret;
+
+}
+
+static int ocfs2_list_xattr_tree_rec(struct inode *inode,
+				     struct buffer_head *root_bh,
+				     u64 blkno, u32 cpos, u32 len, void *para)
+{
+	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
+					   ocfs2_list_xattr_bucket, para);
+}
+
+static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
+					     struct buffer_head *blk_bh,
+					     char *buffer,
+					     size_t buffer_size)
+{
+	int ret;
+	struct ocfs2_xattr_tree_list xl = {
+		.buffer = buffer,
+		.buffer_size = buffer_size,
+		.result = 0,
+	};
+
+	ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
+					      ocfs2_list_xattr_tree_rec, &xl);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
 	ret = xl.result;
 out:
 	return ret;
@@ -4897,7 +4940,8 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 				  struct buffer_head *root_bh,
 				  u64 blkno,
 				  u32 cpos,
-				  u32 len)
+				  u32 len,
+				  void *para)
 {
 	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -4909,6 +4953,13 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 	struct ocfs2_cached_dealloc_ctxt dealloc;
 	struct ocfs2_extent_tree et;
 
+	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
+					  ocfs2_delete_xattr_in_bucket, NULL);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
 
 	ocfs2_init_dealloc_ctxt(&dealloc);
@@ -5331,52 +5382,6 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 	return ret;
 }
 
-static int ocfs2_delete_xattr_index_block(struct inode *inode,
-					  struct buffer_head *xb_bh)
-{
-	struct ocfs2_xattr_block *xb =
-			(struct ocfs2_xattr_block *)xb_bh->b_data;
-	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
-	int ret = 0;
-	u32 name_hash = UINT_MAX, e_cpos, num_clusters;
-	u64 p_blkno;
-
-	if (le16_to_cpu(el->l_next_free_rec) == 0)
-		return 0;
-
-	while (name_hash > 0) {
-		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
-					  &e_cpos, &num_clusters, el);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
-
-		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
-						  ocfs2_delete_xattr_in_bucket,
-						  NULL);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
-
-		ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
-					     p_blkno, e_cpos, num_clusters);
-		if (ret) {
-			mlog_errno(ret);
-			break;
-		}
-
-		if (e_cpos == 0)
-			break;
-
-		name_hash = e_cpos - 1;
-	}
-
-out:
-	return ret;
-}
-
 /*
  * Whenever we modify a xattr value root in the bucket(e.g, CoW
  * or change the extent record flag), we need to recalculate
-- 
cgit v1.2.3


From 0129241e2b3b90ff83a8c774353e5612d84bd493 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 21 Sep 2009 13:04:19 +0800
Subject: ocfs2: Attach xattr clusters to refcount tree.

In ocfs2, when xattr's value is larger than OCFS2_XATTR_INLINE_SIZE,
it will be kept outside of the blocks we store xattr entry. And they
are stored in a b-tree also. So this patch try to attach all these
clusters to refcount tree also.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c |  29 ++++-
 fs/ocfs2/refcounttree.h |   7 ++
 fs/ocfs2/xattr.c        | 291 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/xattr.h        |   6 +-
 4 files changed, 329 insertions(+), 4 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a5b5bef054a7..a85c01c6629d 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3547,7 +3547,8 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 			    struct ocfs2_caching_info *ref_ci,
 			    struct buffer_head *ref_root_bh,
 			    u32 cpos, u32 p_cluster, u32 num_clusters,
-			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+			    struct ocfs2_cached_dealloc_ctxt *dealloc,
+			    struct ocfs2_post_refcount *post)
 {
 	int ret;
 	handle_t *handle;
@@ -3576,6 +3577,9 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 		}
 	}
 
+	if (post)
+		credits += post->credits;
+
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -3594,8 +3598,16 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
 					p_cluster, num_clusters,
 					meta_ac, dealloc);
-	if (ret)
+	if (ret) {
 		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	if (post && post->func) {
+		ret = post->func(inode, handle, post->para);
+		if (ret)
+			mlog_errno(ret);
+	}
 
 out_commit:
 	ocfs2_commit_trans(osb, handle);
@@ -3688,7 +3700,7 @@ static int ocfs2_attach_refcount_tree(struct inode *inode,
 						      &ref_tree->rf_ci,
 						      ref_root_bh, cpos,
 						      p_cluster, num_clusters,
-						      &dealloc);
+						      &dealloc, NULL);
 			if (ret) {
 				mlog_errno(ret);
 				goto unlock;
@@ -3699,6 +3711,17 @@ static int ocfs2_attach_refcount_tree(struct inode *inode,
 		cpos += num_clusters;
 	}
 
+	if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+		ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh,
+						       &ref_tree->rf_ci,
+						       ref_root_bh,
+						       &dealloc);
+		if (ret) {
+			mlog_errno(ret);
+			goto unlock;
+		}
+	}
+
 	if (data_changed) {
 		ret = ocfs2_change_ctime(inode, di_bh);
 		if (ret)
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index d09d64b29810..7d6900c904d4 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -83,4 +83,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
 			     struct buffer_head *ref_root_bh,
 			     u32 cpos, u32 write_len,
 			     struct ocfs2_post_refcount *post);
+int ocfs2_add_refcount_flag(struct inode *inode,
+			    struct ocfs2_extent_tree *data_et,
+			    struct ocfs2_caching_info *ref_ci,
+			    struct buffer_head *ref_root_bh,
+			    u32 cpos, u32 p_cluster, u32 num_clusters,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc,
+			    struct ocfs2_post_refcount *post);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index bfa7ee208855..501539a733f4 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -5550,6 +5550,297 @@ out:
 	return ret;
 }
 
+/*
+ * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
+ * The physical clusters will be added to refcount tree.
+ */
+static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
+				struct ocfs2_xattr_value_root *xv,
+				struct ocfs2_extent_tree *value_et,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc,
+				struct ocfs2_post_refcount *refcount)
+{
+	int ret = 0;
+	u32 clusters = le32_to_cpu(xv->xr_clusters);
+	u32 cpos, p_cluster, num_clusters;
+	struct ocfs2_extent_list *el = &xv->xr_list;
+	unsigned int ext_flags;
+
+	cpos = 0;
+	while (cpos < clusters) {
+		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
+					       &num_clusters, el, &ext_flags);
+
+		cpos += num_clusters;
+		if ((ext_flags & OCFS2_EXT_REFCOUNTED))
+			continue;
+
+		BUG_ON(!p_cluster);
+
+		ret = ocfs2_add_refcount_flag(inode, value_et,
+					      ref_ci, ref_root_bh,
+					      cpos - num_clusters,
+					      p_cluster, num_clusters,
+					      dealloc, refcount);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Given a normal ocfs2_xattr_header, refcount all the entries which
+ * have value stored outside.
+ * Used for xattrs stored in inode and ocfs2_xattr_block.
+ */
+static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
+				struct ocfs2_xattr_value_buf *vb,
+				struct ocfs2_xattr_header *header,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+
+	struct ocfs2_xattr_entry *xe;
+	struct ocfs2_xattr_value_root *xv;
+	struct ocfs2_extent_tree et;
+	int i, ret = 0;
+
+	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
+		xe = &header->xh_entries[i];
+
+		if (ocfs2_xattr_is_local(xe))
+			continue;
+
+		xv = (struct ocfs2_xattr_value_root *)((void *)header +
+			le16_to_cpu(xe->xe_name_offset) +
+			OCFS2_XATTR_SIZE(xe->xe_name_len));
+
+		vb->vb_xv = xv;
+		ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
+
+		ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
+							ref_ci, ref_root_bh,
+							dealloc, NULL);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
+				struct buffer_head *fe_bh,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
+	struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
+				(fe_bh->b_data + inode->i_sb->s_blocksize -
+				le16_to_cpu(di->i_xattr_inline_size));
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = fe_bh,
+		.vb_access = ocfs2_journal_access_di,
+	};
+
+	return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
+						  ref_ci, ref_root_bh, dealloc);
+}
+
+struct ocfs2_xattr_tree_value_refcount_para {
+	struct ocfs2_caching_info *ref_ci;
+	struct buffer_head *ref_root_bh;
+	struct ocfs2_cached_dealloc_ctxt *dealloc;
+};
+
+static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
+					   struct ocfs2_xattr_bucket *bucket,
+					   int offset,
+					   struct ocfs2_xattr_value_root **xv,
+					   struct buffer_head **bh)
+{
+	int ret, block_off, name_offset;
+	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
+	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
+	void *base;
+
+	ret = ocfs2_xattr_bucket_get_name_value(sb,
+						bucket_xh(bucket),
+						offset,
+						&block_off,
+						&name_offset);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	base = bucket_block(bucket, block_off);
+
+	*xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
+			 OCFS2_XATTR_SIZE(xe->xe_name_len));
+
+	if (bh)
+		*bh = bucket->bu_bhs[block_off];
+out:
+	return ret;
+}
+
+/*
+ * For a given xattr bucket, refcount all the entries which
+ * have value stored outside.
+ */
+static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
+					     struct ocfs2_xattr_bucket *bucket,
+					     void *para)
+{
+	int i, ret = 0;
+	struct ocfs2_extent_tree et;
+	struct ocfs2_xattr_tree_value_refcount_para *ref =
+			(struct ocfs2_xattr_tree_value_refcount_para *)para;
+	struct ocfs2_xattr_header *xh =
+			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
+	struct ocfs2_xattr_entry *xe;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_access = ocfs2_journal_access,
+	};
+	struct ocfs2_post_refcount refcount = {
+		.credits = bucket->bu_blocks,
+		.para = bucket,
+		.func = ocfs2_xattr_bucket_post_refcount,
+	};
+	struct ocfs2_post_refcount *p = NULL;
+
+	/* We only need post_refcount if we support metaecc. */
+	if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
+		p = &refcount;
+
+	mlog(0, "refcount bucket %llu, count = %u\n",
+	     (unsigned long long)bucket_blkno(bucket),
+	     le16_to_cpu(xh->xh_count));
+	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
+		xe = &xh->xh_entries[i];
+
+		if (ocfs2_xattr_is_local(xe))
+			continue;
+
+		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
+						      &vb.vb_xv, &vb.vb_bh);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ocfs2_init_xattr_value_extent_tree(&et,
+						   INODE_CACHE(inode), &vb);
+
+		ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
+							&et, ref->ref_ci,
+							ref->ref_root_bh,
+							ref->dealloc, p);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+	}
+
+	return ret;
+
+}
+
+static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
+				     struct buffer_head *root_bh,
+				     u64 blkno, u32 cpos, u32 len, void *para)
+{
+	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
+					   ocfs2_xattr_bucket_value_refcount,
+					   para);
+}
+
+static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
+				struct buffer_head *blk_bh,
+				struct ocfs2_caching_info *ref_ci,
+				struct buffer_head *ref_root_bh,
+				struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0;
+	struct ocfs2_xattr_block *xb =
+				(struct ocfs2_xattr_block *)blk_bh->b_data;
+
+	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
+		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
+		struct ocfs2_xattr_value_buf vb = {
+			.vb_bh = blk_bh,
+			.vb_access = ocfs2_journal_access_xb,
+		};
+
+		ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
+							 ref_ci, ref_root_bh,
+							 dealloc);
+	} else {
+		struct ocfs2_xattr_tree_value_refcount_para para = {
+			.ref_ci = ref_ci,
+			.ref_root_bh = ref_root_bh,
+			.dealloc = dealloc,
+		};
+
+		ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
+						ocfs2_refcount_xattr_tree_rec,
+						&para);
+	}
+
+	return ret;
+}
+
+int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
+				     struct buffer_head *fe_bh,
+				     struct ocfs2_caching_info *ref_ci,
+				     struct buffer_head *ref_root_bh,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	int ret = 0;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
+	struct buffer_head *blk_bh = NULL;
+
+	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
+		ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
+							 ref_ci, ref_root_bh,
+							 dealloc);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	if (!di->i_xattr_loc)
+		goto out;
+
+	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
+				     &blk_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
+						ref_root_bh, dealloc);
+	if (ret)
+		mlog_errno(ret);
+
+	brelse(blk_bh);
+out:
+
+	return ret;
+}
+
 /*
  * 'security' attributes support
  */
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 1ca7e9a1b7bc..a3295d705cea 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -83,5 +83,9 @@ struct ocfs2_xattr_value_buf {
 	struct ocfs2_xattr_value_root	*vb_xv;
 };
 
-
+int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
+				     struct buffer_head *fe_bh,
+				     struct ocfs2_caching_info *ref_ci,
+				     struct buffer_head *ref_root_bh,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc);
 #endif /* OCFS2_XATTR_H */
-- 
cgit v1.2.3


From 8b2c0dba5159570af5721d40490f6c529d721500 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:49 +0800
Subject: ocfs2: Call refcount tree remove process properly.

Now with xattr refcount support, we need to check whether
we have xattr refcounted before we remove the refcount tree.

Now the mechanism is:
1) Check whether i_clusters == 0, if no, exit.
2) check whether we have i_xattr_loc in dinode. if yes, exit.
2) Check whether we have inline xattr stored outside, if yes, exit.
4) Remove the tree.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/file.c         |  2 ++
 fs/ocfs2/inode.c        |  7 +++++++
 fs/ocfs2/refcounttree.c | 36 ++++++++++++++++++++++++++++++++++++
 fs/ocfs2/refcounttree.h |  3 +++
 fs/ocfs2/xattr.c        | 23 +++++++++++++++++++++++
 fs/ocfs2/xattr.h        |  2 ++
 6 files changed, 73 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 75f5b81805b5..2effac5d030e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -531,6 +531,8 @@ bail_unlock_sem:
 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
 
 bail:
+	if (!status && OCFS2_I(inode)->ip_clusters == 0)
+		status = ocfs2_try_remove_refcount_tree(inode, di_bh);
 
 	mlog_exit(status);
 	return status;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index e82ceb31cc83..0297fb8982b8 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -53,6 +53,7 @@
 #include "sysfile.h"
 #include "uptodate.h"
 #include "xattr.h"
+#include "refcounttree.h"
 
 #include "buffer_head_io.h"
 
@@ -782,6 +783,12 @@ static int ocfs2_wipe_inode(struct inode *inode,
 		goto bail_unlock_dir;
 	}
 
+	status = ocfs2_remove_refcount_tree(inode, di_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail_unlock_dir;
+	}
+
 	status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
 				    orphan_dir_bh);
 	if (status < 0)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a85c01c6629d..5656c68a2cae 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -926,6 +926,42 @@ out:
 	*index = i;
 }
 
+/*
+ * Try to remove refcount tree. The mechanism is:
+ * 1) Check whether i_clusters == 0, if no, exit.
+ * 2) check whether we have i_xattr_loc in dinode. if yes, exit.
+ * 3) Check whether we have inline xattr stored outside, if yes, exit.
+ * 4) Remove the tree.
+ */
+int ocfs2_try_remove_refcount_tree(struct inode *inode,
+				   struct buffer_head *di_bh)
+{
+	int ret;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+
+	down_write(&oi->ip_xattr_sem);
+	down_write(&oi->ip_alloc_sem);
+
+	if (oi->ip_clusters)
+		goto out;
+
+	if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && di->i_xattr_loc)
+		goto out;
+
+	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL &&
+	    ocfs2_has_inline_xattr_value_outside(inode, di))
+		goto out;
+
+	ret = ocfs2_remove_refcount_tree(inode, di_bh);
+	if (ret)
+		mlog_errno(ret);
+out:
+	up_write(&oi->ip_alloc_sem);
+	up_write(&oi->ip_xattr_sem);
+	return 0;
+}
+
 /*
  * Given a cpos and len, try to find the refcount record which contains cpos.
  * 1. If cpos can be found in one refcount record, return the record.
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 7d6900c904d4..1e3446a655dd 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -90,4 +90,7 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 			    u32 cpos, u32 p_cluster, u32 num_clusters,
 			    struct ocfs2_cached_dealloc_ctxt *dealloc,
 			    struct ocfs2_post_refcount *post);
+int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh);
+int ocfs2_try_remove_refcount_tree(struct inode *inode,
+				   struct buffer_head *di_bh);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 501539a733f4..6660f1c6149e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -840,6 +840,23 @@ static int ocfs2_xattr_list_entries(struct inode *inode,
 	return result;
 }
 
+int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
+					 struct ocfs2_dinode *di)
+{
+	struct ocfs2_xattr_header *xh;
+	int i;
+
+	xh = (struct ocfs2_xattr_header *)
+		 ((void *)di + inode->i_sb->s_blocksize -
+		 le16_to_cpu(di->i_xattr_inline_size));
+
+	for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
+		if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
+			return 1;
+
+	return 0;
+}
+
 static int ocfs2_xattr_ibody_list(struct inode *inode,
 				  struct ocfs2_dinode *di,
 				  char *buffer,
@@ -2898,10 +2915,16 @@ int ocfs2_xattr_set(struct inode *inode,
 	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
 		ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
+
 cleanup:
 	if (ref_tree)
 		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
+	if (!value && !ret) {
+		ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
+		if (ret)
+			mlog_errno(ret);
+	}
 	ocfs2_inode_unlock(inode, 1);
 cleanup_nolock:
 	brelse(di_bh);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index a3295d705cea..e74703f56dca 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -55,6 +55,8 @@ int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
 			   int, const char *, const void *, size_t, int,
 			   struct ocfs2_alloc_context *,
 			   struct ocfs2_alloc_context *);
+int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
+					 struct ocfs2_dinode *di);
 int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
 int ocfs2_init_security_get(struct inode *, struct inode *,
 			    struct ocfs2_security_xattr_info *);
-- 
cgit v1.2.3


From a7fe7a3a1ab5dac8d81e531c060f51e12010133b Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:52 +0800
Subject: ocfs2: Create an xattr indexed block if needed.

With reflink, there is a need that we create a new xattr indexed
block from the very beginning. So add a new parameter for
ocfs2_create_xattr_block.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/xattr.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 6660f1c6149e..bb92a6d274c0 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2142,7 +2142,8 @@ static int ocfs2_create_xattr_block(handle_t *handle,
 				    struct inode *inode,
 				    struct buffer_head *inode_bh,
 				    struct ocfs2_alloc_context *meta_ac,
-				    struct buffer_head **ret_bh)
+				    struct buffer_head **ret_bh,
+				    int indexed)
 {
 	int ret;
 	u16 suballoc_bit_start;
@@ -2188,6 +2189,17 @@ static int ocfs2_create_xattr_block(handle_t *handle,
 	xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
 	xblk->xb_blkno = cpu_to_le64(first_blkno);
 
+	if (indexed) {
+		struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
+		xr->xt_clusters = cpu_to_le32(1);
+		xr->xt_last_eb_blk = 0;
+		xr->xt_list.l_tree_depth = 0;
+		xr->xt_list.l_count = cpu_to_le16(
+					ocfs2_xattr_recs_per_xb(inode->i_sb));
+		xr->xt_list.l_next_free_rec = cpu_to_le16(1);
+		xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
+	}
+
 	ret = ocfs2_journal_dirty(handle, new_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -2222,7 +2234,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 
 	if (!xs->xattr_bh) {
 		ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
-					       ctxt->meta_ac, &new_bh);
+					       ctxt->meta_ac, &new_bh, 0);
 		if (ret) {
 			mlog_errno(ret);
 			goto end;
-- 
cgit v1.2.3


From 2999d12f4d5529b282ce201b21444590c3f9f723 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:55 +0800
Subject: ocfs2: Add reflink support for xattr.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c |  24 +-
 fs/ocfs2/refcounttree.h |   6 +
 fs/ocfs2/xattr.c        | 923 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/xattr.h        |   4 +
 4 files changed, 945 insertions(+), 12 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5656c68a2cae..dc57d066f794 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1894,12 +1894,12 @@ out:
 	return ret;
 }
 
-static int __ocfs2_increase_refcount(handle_t *handle,
-				     struct ocfs2_caching_info *ci,
-				     struct buffer_head *ref_root_bh,
-				     u64 cpos, u32 len,
-				     struct ocfs2_alloc_context *meta_ac,
-				     struct ocfs2_cached_dealloc_ctxt *dealloc)
+int ocfs2_increase_refcount(handle_t *handle,
+			    struct ocfs2_caching_info *ci,
+			    struct buffer_head *ref_root_bh,
+			    u64 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret = 0, index;
 	struct buffer_head *ref_leaf_bh = NULL;
@@ -3631,9 +3631,9 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
-					p_cluster, num_clusters,
-					meta_ac, dealloc);
+	ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+				      p_cluster, num_clusters,
+				      meta_ac, dealloc);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
@@ -3822,9 +3822,9 @@ static int ocfs2_add_refcounted_extent(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
-					p_cluster, num_clusters,
-					meta_ac, dealloc);
+	ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+				      p_cluster, num_clusters,
+				      meta_ac, dealloc);
 	if (ret)
 		mlog_errno(ret);
 
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 1e3446a655dd..2c238e682570 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -93,4 +93,10 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh);
 int ocfs2_try_remove_refcount_tree(struct inode *inode,
 				   struct buffer_head *di_bh);
+int ocfs2_increase_refcount(handle_t *handle,
+			    struct ocfs2_caching_info *ci,
+			    struct buffer_head *ref_root_bh,
+			    u64 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc);
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index bb92a6d274c0..661ed9b85dbf 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -5876,6 +5876,929 @@ out:
 	return ret;
 }
 
+/*
+ * Store the information we need in xattr reflink.
+ * old_bh and new_bh are inode bh for the old and new inode.
+ */
+struct ocfs2_xattr_reflink {
+	struct inode *old_inode;
+	struct inode *new_inode;
+	struct buffer_head *old_bh;
+	struct buffer_head *new_bh;
+	struct ocfs2_caching_info *ref_ci;
+	struct buffer_head *ref_root_bh;
+	struct ocfs2_cached_dealloc_ctxt *dealloc;
+};
+
+/*
+ * Given a xattr header and xe offset,
+ * return the proper xv and the corresponding bh.
+ * xattr in inode, block and xattr tree have different implementaions.
+ */
+typedef int (get_xattr_value_root)(struct super_block *sb,
+				   struct buffer_head *bh,
+				   struct ocfs2_xattr_header *xh,
+				   int offset,
+				   struct ocfs2_xattr_value_root **xv,
+				   struct buffer_head **ret_bh,
+				   void *para);
+
+/*
+ * Calculate all the xattr value root metadata stored in this xattr header and
+ * credits we need if we create them from the scratch.
+ * We use get_xattr_value_root so that all types of xattr container can use it.
+ */
+static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
+					     struct buffer_head *bh,
+					     struct ocfs2_xattr_header *xh,
+					     int *metas, int *credits,
+					     int *num_recs,
+					     get_xattr_value_root *func,
+					     void *para)
+{
+	int i, ret = 0;
+	struct ocfs2_xattr_value_root *xv;
+	struct ocfs2_xattr_entry *xe;
+
+	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
+		xe = &xh->xh_entries[i];
+		if (ocfs2_xattr_is_local(xe))
+			continue;
+
+		ret = func(sb, bh, xh, i, &xv, NULL, para);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		*metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
+			  le16_to_cpu(xv->xr_list.l_next_free_rec);
+
+		*credits += ocfs2_calc_extend_credits(sb,
+						&def_xv.xv.xr_list,
+						le32_to_cpu(xv->xr_clusters));
+
+		/*
+		 * If the value is a tree with depth > 1, We don't go deep
+		 * to the extent block, so just calculate a maximum record num.
+		 */
+		if (!xv->xr_list.l_tree_depth)
+			*num_recs += xv->xr_list.l_next_free_rec;
+		else
+			*num_recs += ocfs2_clusters_for_bytes(sb,
+							      XATTR_SIZE_MAX);
+	}
+
+	return ret;
+}
+
+/* Used by xattr inode and block to return the right xv and buffer_head. */
+static int ocfs2_get_xattr_value_root(struct super_block *sb,
+				      struct buffer_head *bh,
+				      struct ocfs2_xattr_header *xh,
+				      int offset,
+				      struct ocfs2_xattr_value_root **xv,
+				      struct buffer_head **ret_bh,
+				      void *para)
+{
+	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
+
+	*xv = (struct ocfs2_xattr_value_root *)((void *)xh +
+		le16_to_cpu(xe->xe_name_offset) +
+		OCFS2_XATTR_SIZE(xe->xe_name_len));
+
+	if (ret_bh)
+		*ret_bh = bh;
+
+	return 0;
+}
+
+/*
+ * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
+ * It is only used for inline xattr and xattr block.
+ */
+static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
+					struct ocfs2_xattr_header *xh,
+					struct buffer_head *ref_root_bh,
+					int *credits,
+					struct ocfs2_alloc_context **meta_ac)
+{
+	int ret, meta_add = 0, num_recs = 0;
+	struct ocfs2_refcount_block *rb =
+			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
+
+	*credits = 0;
+
+	ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
+						&meta_add, credits, &num_recs,
+						ocfs2_get_xattr_value_root,
+						NULL);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * We need to add/modify num_recs in refcount tree, so just calculate
+	 * an approximate number we need for refcount tree change.
+	 * Sometimes we need to split the tree, and after split,  half recs
+	 * will be moved to the new block, and a new block can only provide
+	 * half number of recs. So we multiple new blocks by 2.
+	 */
+	num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
+	meta_add += num_recs;
+	*credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
+	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
+		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
+			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
+	else
+		*credits += 1;
+
+	ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	return ret;
+}
+
+/*
+ * Given a xattr header, reflink all the xattrs in this container.
+ * It can be used for inode, block and bucket.
+ *
+ * NOTE:
+ * Before we call this function, the caller has memcpy the xattr in
+ * old_xh to the new_xh.
+ */
+static int ocfs2_reflink_xattr_header(handle_t *handle,
+				      struct ocfs2_xattr_reflink *args,
+				      struct buffer_head *old_bh,
+				      struct ocfs2_xattr_header *xh,
+				      struct buffer_head *new_bh,
+				      struct ocfs2_xattr_header *new_xh,
+				      struct ocfs2_xattr_value_buf *vb,
+				      struct ocfs2_alloc_context *meta_ac,
+				      get_xattr_value_root *func,
+				      void *para)
+{
+	int ret = 0, i;
+	struct super_block *sb = args->old_inode->i_sb;
+	struct buffer_head *value_bh;
+	struct ocfs2_xattr_entry *xe;
+	struct ocfs2_xattr_value_root *xv, *new_xv;
+	struct ocfs2_extent_tree data_et;
+	u32 clusters, cpos, p_cluster, num_clusters;
+	unsigned int ext_flags = 0;
+
+	mlog(0, "reflink xattr in container %llu, count = %u\n",
+	     (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
+	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
+		xe = &xh->xh_entries[i];
+
+		if (ocfs2_xattr_is_local(xe))
+			continue;
+
+		ret = func(sb, old_bh, xh, i, &xv, NULL, para);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ret = func(sb, new_bh, new_xh, i, &new_xv, &value_bh, para);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		/*
+		 * For the xattr which has l_tree_depth = 0, all the extent
+		 * recs have already be copied to the new xh with the
+		 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
+		 * increase the refount count int the refcount tree.
+		 *
+		 * For the xattr which has l_tree_depth > 0, we need
+		 * to initialize it to the empty default value root,
+		 * and then insert the extents one by one.
+		 */
+		if (xv->xr_list.l_tree_depth) {
+			memcpy(new_xv, &def_xv, sizeof(def_xv));
+			vb->vb_xv = new_xv;
+			vb->vb_bh = value_bh;
+			ocfs2_init_xattr_value_extent_tree(&data_et,
+					INODE_CACHE(args->new_inode), vb);
+		}
+
+		clusters = le32_to_cpu(xv->xr_clusters);
+		cpos = 0;
+		while (cpos < clusters) {
+			ret = ocfs2_xattr_get_clusters(args->old_inode,
+						       cpos,
+						       &p_cluster,
+						       &num_clusters,
+						       &xv->xr_list,
+						       &ext_flags);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			BUG_ON(!p_cluster);
+
+			if (xv->xr_list.l_tree_depth) {
+				ret = ocfs2_insert_extent(handle,
+						&data_et, cpos,
+						ocfs2_clusters_to_blocks(
+							args->old_inode->i_sb,
+							p_cluster),
+						num_clusters, ext_flags,
+						meta_ac);
+				if (ret) {
+					mlog_errno(ret);
+					goto out;
+				}
+			}
+
+			ret = ocfs2_increase_refcount(handle, args->ref_ci,
+						      args->ref_root_bh,
+						      p_cluster, num_clusters,
+						      meta_ac, args->dealloc);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			cpos += num_clusters;
+		}
+	}
+
+out:
+	return ret;
+}
+
+static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
+{
+	int ret = 0, credits = 0;
+	handle_t *handle;
+	struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
+	int inline_size = le16_to_cpu(di->i_xattr_inline_size);
+	int header_off = osb->sb->s_blocksize - inline_size;
+	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
+					(args->old_bh->b_data + header_off);
+	struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
+					(args->new_bh->b_data + header_off);
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	struct ocfs2_inode_info *new_oi;
+	struct ocfs2_dinode *new_di;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = args->new_bh,
+		.vb_access = ocfs2_journal_access_di,
+	};
+
+	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
+						  &credits, &meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
+				      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	memcpy(args->new_bh->b_data + header_off,
+	       args->old_bh->b_data + header_off, inline_size);
+
+	new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
+	new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
+
+	ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
+					 args->new_bh, new_xh, &vb, meta_ac,
+					 ocfs2_get_xattr_value_root, NULL);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	new_oi = OCFS2_I(args->new_inode);
+	spin_lock(&new_oi->ip_lock);
+	new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
+	new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
+	spin_unlock(&new_oi->ip_lock);
+
+	ocfs2_journal_dirty(handle, args->new_bh);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+static int ocfs2_create_empty_xattr_block(struct inode *inode,
+					  struct buffer_head *fe_bh,
+					  struct buffer_head **ret_bh,
+					  int indexed)
+{
+	int ret;
+	handle_t *handle;
+	struct ocfs2_alloc_context *meta_ac;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "create new xattr block for inode %llu, index = %d\n",
+	     (unsigned long long)fe_bh->b_blocknr, indexed);
+	ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
+				       meta_ac, ret_bh, indexed);
+	if (ret)
+		mlog_errno(ret);
+
+	ocfs2_commit_trans(osb, handle);
+out:
+	ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
+				     struct buffer_head *blk_bh,
+				     struct buffer_head *new_blk_bh)
+{
+	int ret = 0, credits = 0;
+	handle_t *handle;
+	struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
+	struct ocfs2_dinode *new_di;
+	struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
+	int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
+	struct ocfs2_xattr_block *xb =
+			(struct ocfs2_xattr_block *)blk_bh->b_data;
+	struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
+	struct ocfs2_xattr_block *new_xb =
+			(struct ocfs2_xattr_block *)new_blk_bh->b_data;
+	struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
+	struct ocfs2_alloc_context *meta_ac;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_bh = new_blk_bh,
+		.vb_access = ocfs2_journal_access_xb,
+	};
+
+	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
+						  &credits, &meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	/* One more credits in case we need to add xattr flags in new inode. */
+	handle = ocfs2_start_trans(osb, credits + 1);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
+		ret = ocfs2_journal_access_di(handle,
+					      INODE_CACHE(args->new_inode),
+					      args->new_bh,
+					      OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
+	}
+
+	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
+				      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
+	       osb->sb->s_blocksize - header_off);
+
+	ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
+					 new_blk_bh, new_xh, &vb, meta_ac,
+					 ocfs2_get_xattr_value_root, NULL);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	ocfs2_journal_dirty(handle, new_blk_bh);
+
+	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
+		new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
+		spin_lock(&new_oi->ip_lock);
+		new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
+		new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
+		spin_unlock(&new_oi->ip_lock);
+
+		ocfs2_journal_dirty(handle, args->new_bh);
+	}
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	ocfs2_free_alloc_context(meta_ac);
+	return ret;
+}
+
+struct ocfs2_reflink_xattr_tree_args {
+	struct ocfs2_xattr_reflink *reflink;
+	struct buffer_head *old_blk_bh;
+	struct buffer_head *new_blk_bh;
+	struct ocfs2_xattr_bucket *old_bucket;
+	struct ocfs2_xattr_bucket *new_bucket;
+};
+
+/*
+ * NOTE:
+ * We have to handle the case that both old bucket and new bucket
+ * will call this function to get the right ret_bh.
+ * So The caller must give us the right bh.
+ */
+static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
+					struct buffer_head *bh,
+					struct ocfs2_xattr_header *xh,
+					int offset,
+					struct ocfs2_xattr_value_root **xv,
+					struct buffer_head **ret_bh,
+					void *para)
+{
+	struct ocfs2_reflink_xattr_tree_args *args =
+			(struct ocfs2_reflink_xattr_tree_args *)para;
+	struct ocfs2_xattr_bucket *bucket;
+
+	if (bh == args->old_bucket->bu_bhs[0])
+		bucket = args->old_bucket;
+	else
+		bucket = args->new_bucket;
+
+	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
+					       xv, ret_bh);
+}
+
+struct ocfs2_value_tree_metas {
+	int num_metas;
+	int credits;
+	int num_recs;
+};
+
+static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
+					struct buffer_head *bh,
+					struct ocfs2_xattr_header *xh,
+					int offset,
+					struct ocfs2_xattr_value_root **xv,
+					struct buffer_head **ret_bh,
+					void *para)
+{
+	struct ocfs2_xattr_bucket *bucket =
+				(struct ocfs2_xattr_bucket *)para;
+
+	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
+					       xv, ret_bh);
+}
+
+static int ocfs2_calc_value_tree_metas(struct inode *inode,
+				      struct ocfs2_xattr_bucket *bucket,
+				      void *para)
+{
+	struct ocfs2_value_tree_metas *metas =
+			(struct ocfs2_value_tree_metas *)para;
+	struct ocfs2_xattr_header *xh =
+			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
+
+	/* Add the credits for this bucket first. */
+	metas->credits += bucket->bu_blocks;
+	return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
+					xh, &metas->num_metas,
+					&metas->credits, &metas->num_recs,
+					ocfs2_value_tree_metas_in_bucket,
+					bucket);
+}
+
+/*
+ * Given a xattr extent rec starting from blkno and having len clusters,
+ * iterate all the buckets calculate how much metadata we need for reflinking
+ * all the ocfs2_xattr_value_root and lock the allocators accordingly.
+ */
+static int ocfs2_lock_reflink_xattr_rec_allocators(
+				struct ocfs2_reflink_xattr_tree_args *args,
+				struct ocfs2_extent_tree *xt_et,
+				u64 blkno, u32 len, int *credits,
+				struct ocfs2_alloc_context **meta_ac,
+				struct ocfs2_alloc_context **data_ac)
+{
+	int ret, num_free_extents;
+	struct ocfs2_value_tree_metas metas;
+	struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
+	struct ocfs2_refcount_block *rb;
+
+	memset(&metas, 0, sizeof(metas));
+
+	ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
+					  ocfs2_calc_value_tree_metas, &metas);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	*credits = metas.credits;
+
+	/*
+	 * Calculate we need for refcount tree change.
+	 *
+	 * We need to add/modify num_recs in refcount tree, so just calculate
+	 * an approximate number we need for refcount tree change.
+	 * Sometimes we need to split the tree, and after split,  half recs
+	 * will be moved to the new block, and a new block can only provide
+	 * half number of recs. So we multiple new blocks by 2.
+	 * In the end, we have to add credits for modifying the already
+	 * existed refcount block.
+	 */
+	rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
+	metas.num_recs =
+		(metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
+		 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
+	metas.num_metas += metas.num_recs;
+	*credits += metas.num_recs +
+		    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
+	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
+		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
+			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
+	else
+		*credits += 1;
+
+	/* count in the xattr tree change. */
+	num_free_extents = ocfs2_num_free_extents(osb, xt_et);
+	if (num_free_extents < 0) {
+		ret = num_free_extents;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (num_free_extents < len)
+		metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
+
+	*credits += ocfs2_calc_extend_credits(osb->sb,
+					      xt_et->et_root_el, len);
+
+	if (metas.num_metas) {
+		ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
+							meta_ac);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	}
+
+	if (len) {
+		ret = ocfs2_reserve_clusters(osb, len, data_ac);
+		if (ret)
+			mlog_errno(ret);
+	}
+out:
+	if (ret) {
+		if (*meta_ac) {
+			ocfs2_free_alloc_context(*meta_ac);
+			meta_ac = NULL;
+		}
+	}
+
+	return ret;
+}
+
+static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+				u64 blkno, u64 new_blkno, u32 clusters,
+				struct ocfs2_alloc_context *meta_ac,
+				struct ocfs2_alloc_context *data_ac,
+				struct ocfs2_reflink_xattr_tree_args *args)
+{
+	int i, j, ret = 0;
+	struct super_block *sb = args->reflink->old_inode->i_sb;
+	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
+	u32 num_buckets = clusters * bpc;
+	int bpb = args->old_bucket->bu_blocks;
+	struct ocfs2_xattr_value_buf vb = {
+		.vb_access = ocfs2_journal_access,
+	};
+
+	for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
+		ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		/*
+		 * The real bucket num in this series of blocks is stored
+		 * in the 1st bucket.
+		 */
+		if (i == 0)
+			num_buckets = le16_to_cpu(
+				bucket_xh(args->old_bucket)->xh_num_buckets);
+
+		ret = ocfs2_xattr_bucket_journal_access(handle,
+						args->new_bucket,
+						OCFS2_JOURNAL_ACCESS_CREATE);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		for (j = 0; j < bpb; j++)
+			memcpy(bucket_block(args->new_bucket, j),
+			       bucket_block(args->old_bucket, j),
+			       sb->s_blocksize);
+
+		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
+
+		ret = ocfs2_reflink_xattr_header(handle, args->reflink,
+					args->old_bucket->bu_bhs[0],
+					bucket_xh(args->old_bucket),
+					args->new_bucket->bu_bhs[0],
+					bucket_xh(args->new_bucket),
+					&vb, meta_ac,
+					ocfs2_get_reflink_xattr_value_root,
+					args);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		/*
+		 * Re-access and dirty the bucket to calculate metaecc.
+		 * Because we may extend the transaction in reflink_xattr_header
+		 * which will let the already accessed block gone.
+		 */
+		ret = ocfs2_xattr_bucket_journal_access(handle,
+						args->new_bucket,
+						OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
+		ocfs2_xattr_bucket_relse(args->old_bucket);
+		ocfs2_xattr_bucket_relse(args->new_bucket);
+	}
+
+	ocfs2_xattr_bucket_relse(args->old_bucket);
+	ocfs2_xattr_bucket_relse(args->new_bucket);
+	return ret;
+}
+/*
+ * Create the same xattr extent record in the new inode's xattr tree.
+ */
+static int ocfs2_reflink_xattr_rec(struct inode *inode,
+				   struct buffer_head *root_bh,
+				   u64 blkno,
+				   u32 cpos,
+				   u32 len,
+				   void *para)
+{
+	int ret, credits = 0;
+	u32 p_cluster, num_clusters;
+	u64 new_blkno;
+	handle_t *handle;
+	struct ocfs2_reflink_xattr_tree_args *args =
+			(struct ocfs2_reflink_xattr_tree_args *)para;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	struct ocfs2_alloc_context *data_ac = NULL;
+	struct ocfs2_extent_tree et;
+
+	ocfs2_init_xattr_tree_extent_tree(&et,
+					  INODE_CACHE(args->reflink->new_inode),
+					  args->new_blk_bh);
+
+	ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
+						      len, &credits,
+						      &meta_ac, &data_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_claim_clusters(osb, handle, data_ac,
+				   len, &p_cluster, &num_clusters);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
+
+	mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
+	     (unsigned long long)blkno, (unsigned long long)new_blkno, len);
+	ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
+					  meta_ac, data_ac, args);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
+	     (unsigned long long)new_blkno, len, cpos);
+	ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
+				  len, 0, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+
+out:
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+	if (data_ac)
+		ocfs2_free_alloc_context(data_ac);
+	return ret;
+}
+
+/*
+ * Create reflinked xattr buckets.
+ * We will add bucket one by one, and refcount all the xattrs in the bucket
+ * if they are stored outside.
+ */
+static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
+				    struct buffer_head *blk_bh,
+				    struct buffer_head *new_blk_bh)
+{
+	int ret;
+	struct ocfs2_reflink_xattr_tree_args para;
+
+	memset(&para, 0, sizeof(para));
+	para.reflink = args;
+	para.old_blk_bh = blk_bh;
+	para.new_blk_bh = new_blk_bh;
+
+	para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
+	if (!para.old_bucket) {
+		mlog_errno(-ENOMEM);
+		return -ENOMEM;
+	}
+
+	para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
+	if (!para.new_bucket) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
+					      ocfs2_reflink_xattr_rec,
+					      &para);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	ocfs2_xattr_bucket_free(para.old_bucket);
+	ocfs2_xattr_bucket_free(para.new_bucket);
+	return ret;
+}
+
+static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
+					struct buffer_head *blk_bh)
+{
+	int ret, indexed = 0;
+	struct buffer_head *new_blk_bh = NULL;
+	struct ocfs2_xattr_block *xb =
+			(struct ocfs2_xattr_block *)blk_bh->b_data;
+
+
+	if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
+		indexed = 1;
+
+	ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
+					     &new_blk_bh, indexed);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
+		ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
+	else
+		ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	brelse(new_blk_bh);
+	return ret;
+}
+
+int ocfs2_reflink_xattrs(struct inode *old_inode,
+			 struct buffer_head *old_bh,
+			 struct inode *new_inode,
+			 struct buffer_head *new_bh)
+{
+	int ret;
+	struct ocfs2_xattr_reflink args;
+	struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
+	struct buffer_head *blk_bh = NULL;
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+	struct ocfs2_refcount_tree *ref_tree;
+	struct buffer_head *ref_root_bh = NULL;
+
+	ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
+				       le64_to_cpu(di->i_refcount_loc),
+				       1, &ref_tree, &ref_root_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_init_dealloc_ctxt(&dealloc);
+
+	args.old_inode = old_inode;
+	args.new_inode = new_inode;
+	args.old_bh = old_bh;
+	args.new_bh = new_bh;
+	args.ref_ci = &ref_tree->rf_ci;
+	args.ref_root_bh = ref_root_bh;
+	args.dealloc = &dealloc;
+
+	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
+		ret = ocfs2_reflink_xattr_inline(&args);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_unlock;
+		}
+	}
+
+	if (!di->i_xattr_loc)
+		goto out_unlock;
+
+	ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
+				     &blk_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
+	if (ret)
+		mlog_errno(ret);
+
+	brelse(blk_bh);
+
+out_unlock:
+	ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
+				   ref_tree, 1);
+	brelse(ref_root_bh);
+
+	if (ocfs2_dealloc_has_cluster(&dealloc)) {
+		ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
+		ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
+	}
+
+out:
+	return ret;
+}
+
 /*
  * 'security' attributes support
  */
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index e74703f56dca..4f913053d5ee 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -90,4 +90,8 @@ int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
 				     struct ocfs2_caching_info *ref_ci,
 				     struct buffer_head *ref_root_bh,
 				     struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_reflink_xattrs(struct inode *old_inode,
+			 struct buffer_head *old_bh,
+			 struct inode *new_inode,
+			 struct buffer_head *new_bh);
 #endif /* OCFS2_XATTR_H */
-- 
cgit v1.2.3


From ce9c5a54c0f06b0efb4db8720a0616cc6aa0e5b2 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:43:59 +0800
Subject: ocfs2: Modify removing xattr process for refcount.

The old xattr value remove is quite simple, it just erase the
tree and free the clusters. But as we have added refcount support,
The process is a little complicated.

We have to lock the refcount tree at the beginning, what's more,
we may split the refcount tree in some cases, so meta/credits are
needed.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/xattr.c | 190 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 154 insertions(+), 36 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 661ed9b85dbf..8d1a0abc105c 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -199,6 +199,11 @@ static int ocfs2_prepare_refcount_xattr(struct inode *inode,
 					struct ocfs2_refcount_tree **ref_tree,
 					int *meta_need,
 					int *credits);
+static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
+					   struct ocfs2_xattr_bucket *bucket,
+					   int offset,
+					   struct ocfs2_xattr_value_root **xv,
+					   struct buffer_head **bh);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -1752,51 +1757,112 @@ out:
 	return ret;
 }
 
+/*
+ * In xattr remove, if it is stored outside and refcounted, we may have
+ * the chance to split the refcount tree. So need the allocators.
+ */
+static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
+					struct ocfs2_xattr_value_root *xv,
+					struct ocfs2_caching_info *ref_ci,
+					struct buffer_head *ref_root_bh,
+					struct ocfs2_alloc_context **meta_ac,
+					int *ref_credits)
+{
+	int ret, meta_add = 0;
+	u32 p_cluster, num_clusters;
+	unsigned int ext_flags;
+
+	*ref_credits = 0;
+	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
+				       &num_clusters,
+				       &xv->xr_list,
+				       &ext_flags);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
+		goto out;
+
+	ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
+						 ref_root_bh, xv,
+						 &meta_add, ref_credits);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
+						meta_add, meta_ac);
+	if (ret)
+		mlog_errno(ret);
+
+out:
+	return ret;
+}
+
 static int ocfs2_remove_value_outside(struct inode*inode,
 				      struct ocfs2_xattr_value_buf *vb,
-				      struct ocfs2_xattr_header *header)
+				      struct ocfs2_xattr_header *header,
+				      struct ocfs2_caching_info *ref_ci,
+				      struct buffer_head *ref_root_bh)
 {
-	int ret = 0, i;
+	int ret = 0, i, ref_credits;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
+	void *val;
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
-	ctxt.handle = ocfs2_start_trans(osb,
-					ocfs2_remove_extent_credits(osb->sb));
-	if (IS_ERR(ctxt.handle)) {
-		ret = PTR_ERR(ctxt.handle);
-		mlog_errno(ret);
-		goto out;
-	}
-
 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
 
-		if (!ocfs2_xattr_is_local(entry)) {
-			void *val;
+		if (ocfs2_xattr_is_local(entry))
+			continue;
 
-			val = (void *)header +
-				le16_to_cpu(entry->xe_name_offset);
-			vb->vb_xv = (struct ocfs2_xattr_value_root *)
-				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
-			ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
-			if (ret < 0) {
-				mlog_errno(ret);
-				break;
-			}
+		val = (void *)header +
+			le16_to_cpu(entry->xe_name_offset);
+		vb->vb_xv = (struct ocfs2_xattr_value_root *)
+			(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
+
+		ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
+							 ref_ci, ref_root_bh,
+							 &ctxt.meta_ac,
+							 &ref_credits);
+
+		ctxt.handle = ocfs2_start_trans(osb, ref_credits +
+					ocfs2_remove_extent_credits(osb->sb));
+		if (IS_ERR(ctxt.handle)) {
+			ret = PTR_ERR(ctxt.handle);
+			mlog_errno(ret);
+			break;
+		}
+
+		ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
+		if (ret < 0) {
+			mlog_errno(ret);
+			break;
+		}
+
+		ocfs2_commit_trans(osb, ctxt.handle);
+		if (ctxt.meta_ac) {
+			ocfs2_free_alloc_context(ctxt.meta_ac);
+			ctxt.meta_ac = NULL;
 		}
 	}
 
-	ocfs2_commit_trans(osb, ctxt.handle);
+	if (ctxt.meta_ac)
+		ocfs2_free_alloc_context(ctxt.meta_ac);
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
-out:
 	return ret;
 }
 
 static int ocfs2_xattr_ibody_remove(struct inode *inode,
-				    struct buffer_head *di_bh)
+				    struct buffer_head *di_bh,
+				    struct ocfs2_caching_info *ref_ci,
+				    struct buffer_head *ref_root_bh)
 {
 
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -1811,13 +1877,21 @@ static int ocfs2_xattr_ibody_remove(struct inode *inode,
 		 ((void *)di + inode->i_sb->s_blocksize -
 		 le16_to_cpu(di->i_xattr_inline_size));
 
-	ret = ocfs2_remove_value_outside(inode, &vb, header);
+	ret = ocfs2_remove_value_outside(inode, &vb, header,
+					 ref_ci, ref_root_bh);
 
 	return ret;
 }
 
+struct ocfs2_rm_xattr_bucket_para {
+	struct ocfs2_caching_info *ref_ci;
+	struct buffer_head *ref_root_bh;
+};
+
 static int ocfs2_xattr_block_remove(struct inode *inode,
-				    struct buffer_head *blk_bh)
+				    struct buffer_head *blk_bh,
+				    struct ocfs2_caching_info *ref_ci,
+				    struct buffer_head *ref_root_bh)
 {
 	struct ocfs2_xattr_block *xb;
 	int ret = 0;
@@ -1825,22 +1899,29 @@ static int ocfs2_xattr_block_remove(struct inode *inode,
 		.vb_bh = blk_bh,
 		.vb_access = ocfs2_journal_access_xb,
 	};
+	struct ocfs2_rm_xattr_bucket_para args = {
+		.ref_ci = ref_ci,
+		.ref_root_bh = ref_root_bh,
+	};
 
 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
-		ret = ocfs2_remove_value_outside(inode, &vb, header);
+		ret = ocfs2_remove_value_outside(inode, &vb, header,
+						 ref_ci, ref_root_bh);
 	} else
 		ret = ocfs2_iterate_xattr_index_block(inode,
 						blk_bh,
 						ocfs2_rm_xattr_cluster,
-						NULL);
+						&args);
 
 	return ret;
 }
 
 static int ocfs2_xattr_free_block(struct inode *inode,
-				  u64 block)
+				  u64 block,
+				  struct ocfs2_caching_info *ref_ci,
+				  struct buffer_head *ref_root_bh)
 {
 	struct inode *xb_alloc_inode;
 	struct buffer_head *xb_alloc_bh = NULL;
@@ -1858,7 +1939,7 @@ static int ocfs2_xattr_free_block(struct inode *inode,
 		goto out;
 	}
 
-	ret = ocfs2_xattr_block_remove(inode, blk_bh);
+	ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -1918,6 +1999,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 {
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+	struct ocfs2_refcount_tree *ref_tree = NULL;
+	struct buffer_head *ref_root_bh = NULL;
+	struct ocfs2_caching_info *ref_ci = NULL;
 	handle_t *handle;
 	int ret;
 
@@ -1927,8 +2011,21 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
 		return 0;
 
+	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
+		ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
+					       le64_to_cpu(di->i_refcount_loc),
+					       1, &ref_tree, &ref_root_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+		ref_ci = &ref_tree->rf_ci;
+
+	}
+
 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
-		ret = ocfs2_xattr_ibody_remove(inode, di_bh);
+		ret = ocfs2_xattr_ibody_remove(inode, di_bh,
+					       ref_ci, ref_root_bh);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -1937,7 +2034,8 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 
 	if (di->i_xattr_loc) {
 		ret = ocfs2_xattr_free_block(inode,
-					     le64_to_cpu(di->i_xattr_loc));
+					     le64_to_cpu(di->i_xattr_loc),
+					     ref_ci, ref_root_bh);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
@@ -1971,6 +2069,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 out_commit:
 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
+	if (ref_tree)
+		ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
+	brelse(ref_root_bh);
 	return ret;
 }
 
@@ -4989,7 +5090,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 	struct ocfs2_extent_tree et;
 
 	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
-					  ocfs2_delete_xattr_in_bucket, NULL);
+					  ocfs2_delete_xattr_in_bucket, para);
 	if (ret) {
 		mlog_errno(ret);
 		return ret;
@@ -5378,7 +5479,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 					struct ocfs2_xattr_bucket *bucket,
 					void *para)
 {
-	int ret = 0;
+	int ret = 0, ref_credits;
 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
 	u16 i;
 	struct ocfs2_xattr_entry *xe;
@@ -5386,7 +5487,9 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
 	int credits = ocfs2_remove_extent_credits(osb->sb) +
 		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-
+	struct ocfs2_xattr_value_root *xv;
+	struct ocfs2_rm_xattr_bucket_para *args =
+			(struct ocfs2_rm_xattr_bucket_para *)para;
 
 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
 
@@ -5395,7 +5498,16 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 		if (ocfs2_xattr_is_local(xe))
 			continue;
 
-		ctxt.handle = ocfs2_start_trans(osb, credits);
+		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
+						      i, &xv, NULL);
+
+		ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
+							 args->ref_ci,
+							 args->ref_root_bh,
+							 &ctxt.meta_ac,
+							 &ref_credits);
+
+		ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
 		if (IS_ERR(ctxt.handle)) {
 			ret = PTR_ERR(ctxt.handle);
 			mlog_errno(ret);
@@ -5406,12 +5518,18 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 							i, 0, &ctxt);
 
 		ocfs2_commit_trans(osb, ctxt.handle);
+		if (ctxt.meta_ac) {
+			ocfs2_free_alloc_context(ctxt.meta_ac);
+			ctxt.meta_ac = NULL;
+		}
 		if (ret) {
 			mlog_errno(ret);
 			break;
 		}
 	}
 
+	if (ctxt.meta_ac)
+		ocfs2_free_alloc_context(ctxt.meta_ac);
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
 	return ret;
-- 
cgit v1.2.3


From 7540c1a77b26bc2f9d86a0bfbe6597b05ec5f93d Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:44:03 +0800
Subject: ocfs2: Don't merge in 1st refcount ops of reflink.

Actually the whole reflink will touch refcount tree 2 times:
1. It will add the clusters in the extent record to the tree if it
   isn't refcounted before.
2. It will add 1 refcount to these clusters when it add these
   extent records to the tree.

So actually we shouldn't do merge in the 1st operation since the 2nd
one will soon be called and we may have to split it again. Do a merge
first and split soon is a waste of time. So we only merge in the 2nd
round. This is done by adding a new internal __ocfs2_increase_refcount
and call it with "not-merge" for 1st refcount operation in reflink.

This also has a side-effect that we don't need to worry too much about
the metadata allocation in the 2nd round since it will only merge and
no split will happen for those records.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c | 56 +++++++++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 20 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index dc57d066f794..47df8c5cd3c5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1147,7 +1147,7 @@ static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
 static int ocfs2_change_refcount_rec(handle_t *handle,
 				     struct ocfs2_caching_info *ci,
 				     struct buffer_head *ref_leaf_bh,
-				     int index, int change)
+				     int index, int merge, int change)
 {
 	int ret;
 	struct ocfs2_refcount_block *rb =
@@ -1176,7 +1176,7 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
 		}
 
 		le16_add_cpu(&rl->rl_used, -1);
-	} else
+	} else if (merge)
 		ocfs2_refcount_rec_merge(rb, index);
 
 	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
@@ -1652,7 +1652,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
 				     struct buffer_head *ref_root_bh,
 				     struct buffer_head *ref_leaf_bh,
 				     struct ocfs2_refcount_rec *rec,
-				     int index,
+				     int index, int merge,
 				     struct ocfs2_alloc_context *meta_ac)
 {
 	int ret;
@@ -1710,7 +1710,8 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
 
 	le16_add_cpu(&rf_list->rl_used, 1);
 
-	ocfs2_refcount_rec_merge(rb, index);
+	if (merge)
+		ocfs2_refcount_rec_merge(rb, index);
 
 	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
 	if (ret) {
@@ -1744,7 +1745,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
 				    struct buffer_head *ref_root_bh,
 				    struct buffer_head *ref_leaf_bh,
 				    struct ocfs2_refcount_rec *split_rec,
-				    int index,
+				    int index, int merge,
 				    struct ocfs2_alloc_context *meta_ac,
 				    struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
@@ -1882,7 +1883,8 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
 		     le32_to_cpu(split_rec->r_refcount),
 		     (unsigned long long)ref_leaf_bh->b_blocknr, index);
 
-		ocfs2_refcount_rec_merge(rb, index);
+		if (merge)
+			ocfs2_refcount_rec_merge(rb, index);
 	}
 
 	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
@@ -1894,12 +1896,12 @@ out:
 	return ret;
 }
 
-int ocfs2_increase_refcount(handle_t *handle,
-			    struct ocfs2_caching_info *ci,
-			    struct buffer_head *ref_root_bh,
-			    u64 cpos, u32 len,
-			    struct ocfs2_alloc_context *meta_ac,
-			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+static int __ocfs2_increase_refcount(handle_t *handle,
+				     struct ocfs2_caching_info *ci,
+				     struct buffer_head *ref_root_bh,
+				     u64 cpos, u32 len, int merge,
+				     struct ocfs2_alloc_context *meta_ac,
+				     struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	int ret = 0, index;
 	struct buffer_head *ref_leaf_bh = NULL;
@@ -1937,7 +1939,8 @@ int ocfs2_increase_refcount(handle_t *handle,
 			     "count %u\n", (unsigned long long)cpos, set_len,
 			     le32_to_cpu(rec.r_refcount));
 			ret = ocfs2_change_refcount_rec(handle, ci,
-							ref_leaf_bh, index, 1);
+							ref_leaf_bh, index,
+							merge, 1);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -1950,7 +1953,8 @@ int ocfs2_increase_refcount(handle_t *handle,
 			     set_len);
 			ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
 							ref_leaf_bh,
-							&rec, index, meta_ac);
+							&rec, index,
+							merge, meta_ac);
 			if (ret) {
 				mlog_errno(ret);
 				goto out;
@@ -1968,7 +1972,7 @@ int ocfs2_increase_refcount(handle_t *handle,
 			     set_len, le32_to_cpu(rec.r_refcount));
 			ret = ocfs2_split_refcount_rec(handle, ci,
 						       ref_root_bh, ref_leaf_bh,
-						       &rec, index,
+						       &rec, index, merge,
 						       meta_ac, dealloc);
 			if (ret) {
 				mlog_errno(ret);
@@ -2061,6 +2065,18 @@ out:
 	return ret;
 }
 
+int ocfs2_increase_refcount(handle_t *handle,
+			    struct ocfs2_caching_info *ci,
+			    struct buffer_head *ref_root_bh,
+			    u64 cpos, u32 len,
+			    struct ocfs2_alloc_context *meta_ac,
+			    struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+	return __ocfs2_increase_refcount(handle, ci, ref_root_bh,
+					 cpos, len, 1,
+					 meta_ac, dealloc);
+}
+
 static int ocfs2_decrease_refcount_rec(handle_t *handle,
 				struct ocfs2_caching_info *ci,
 				struct buffer_head *ref_root_bh,
@@ -2081,7 +2097,7 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle,
 	if (cpos == le64_to_cpu(rec->r_cpos) &&
 	    len == le32_to_cpu(rec->r_clusters))
 		ret = ocfs2_change_refcount_rec(handle, ci,
-						ref_leaf_bh, index, -1);
+						ref_leaf_bh, index, 1, -1);
 	else {
 		struct ocfs2_refcount_rec split = *rec;
 		split.r_cpos = cpu_to_le64(cpos);
@@ -2097,7 +2113,7 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle,
 		     le32_to_cpu(rec->r_clusters));
 		ret = ocfs2_split_refcount_rec(handle, ci,
 					       ref_root_bh, ref_leaf_bh,
-					       &split, index,
+					       &split, index, 1,
 					       meta_ac, dealloc);
 	}
 
@@ -3631,9 +3647,9 @@ int ocfs2_add_refcount_flag(struct inode *inode,
 		goto out_commit;
 	}
 
-	ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
-				      p_cluster, num_clusters,
-				      meta_ac, dealloc);
+	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
+					p_cluster, num_clusters, 0,
+					meta_ac, dealloc);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_commit;
-- 
cgit v1.2.3


From c18b812d127a971901180188b918a7cd98ccd4d6 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:44:07 +0800
Subject: ocfs2: Make transaction extend more efficient.

In ocfs2_extend_rotate_transaction, op_credits is the orignal
credits in the handle and we only want to extend the credits
for the rotation, but the old solution always double it. It
is harmless for some minor operations, but for actions like
reflink we may rotate tree many times and cause the credits
increase dramatically. So this patch try to only increase
the desired credits.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7c879fc7834f..38a42f5d59ff 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2326,10 +2326,18 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
 					   int op_credits,
 					   struct ocfs2_path *path)
 {
+	int ret;
 	int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
 
-	if (handle->h_buffer_credits < credits)
-		return ocfs2_extend_trans(handle, credits);
+	if (handle->h_buffer_credits < credits) {
+		ret = ocfs2_extend_trans(handle,
+					 credits - handle->h_buffer_credits);
+		if (ret)
+			return ret;
+
+		if (unlikely(handle->h_buffer_credits < credits))
+			return ocfs2_extend_trans(handle, credits);
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 19bd341f6a6c6b314bcac55bbd906bfd3603fe9e Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:44:10 +0800
Subject: ocfs2: Use proper parameter for some inode operation.

In order to make the original function more suitable for reflink,
we modify the following inode operations. Both are tiny.

1. ocfs2_mknod_locked only use dentry for mlog, so move it to
   the caller so that reflink can use it without dentry.
2. ocfs2_prepare_orphan_dir only want inode to get its ip_blkno.
   So use ip_blkno instead.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/namei.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index c07217ad8796..818df582ba06 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -69,7 +69,6 @@
 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 			      struct inode *dir,
 			      struct inode *inode,
-			      struct dentry *dentry,
 			      dev_t dev,
 			      struct buffer_head **new_fe_bh,
 			      struct buffer_head *parent_fe_bh,
@@ -78,7 +77,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 				    struct inode **ret_orphan_dir,
-				    struct inode *inode,
+				    u64 blkno,
 				    char *name,
 				    struct ocfs2_dir_lookup_result *lookup);
 
@@ -358,8 +357,12 @@ static int ocfs2_mknod(struct inode *dir,
 	}
 	did_quota_inode = 1;
 
+	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
+		   inode->i_mode, (unsigned long)dev, dentry->d_name.len,
+		   dentry->d_name.name);
+
 	/* do the real work now. */
-	status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev,
+	status = ocfs2_mknod_locked(osb, dir, inode, dev,
 				    &new_fe_bh, parent_fe_bh, handle,
 				    inode_ac);
 	if (status < 0) {
@@ -466,7 +469,6 @@ leave:
 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 			      struct inode *dir,
 			      struct inode *inode,
-			      struct dentry *dentry,
 			      dev_t dev,
 			      struct buffer_head **new_fe_bh,
 			      struct buffer_head *parent_fe_bh,
@@ -480,10 +482,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 	u16 suballoc_bit;
 	u16 feat;
 
-	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
-		   inode->i_mode, (unsigned long)dev, dentry->d_name.len,
-		   dentry->d_name.name);
-
 	*new_fe_bh = NULL;
 
 	status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh,
@@ -852,7 +850,8 @@ static int ocfs2_unlink(struct inode *dir,
 	}
 
 	if (inode_is_unlinkable(inode)) {
-		status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode,
+		status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
+						  OCFS2_I(inode)->ip_blkno,
 						  orphan_name, &orphan_insert);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1243,9 +1242,8 @@ static int ocfs2_rename(struct inode *old_dir,
 
 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
 			status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
-							  new_inode,
-							  orphan_name,
-							  &orphan_insert);
+						OCFS2_I(new_inode)->ip_blkno,
+						orphan_name, &orphan_insert);
 			if (status < 0) {
 				mlog_errno(status);
 				goto bail;
@@ -1699,7 +1697,11 @@ static int ocfs2_symlink(struct inode *dir,
 	}
 	did_quota_inode = 1;
 
-	status = ocfs2_mknod_locked(osb, dir, inode, dentry,
+	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry,
+		   inode->i_mode, dentry->d_name.len,
+		   dentry->d_name.name);
+
+	status = ocfs2_mknod_locked(osb, dir, inode,
 				    0, &new_fe_bh, parent_fe_bh, handle,
 				    inode_ac);
 	if (status < 0) {
@@ -1849,7 +1851,7 @@ bail:
 
 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 				    struct inode **ret_orphan_dir,
-				    struct inode *inode,
+				    u64 blkno,
 				    char *name,
 				    struct ocfs2_dir_lookup_result *lookup)
 {
@@ -1857,7 +1859,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 	struct buffer_head *orphan_dir_bh = NULL;
 	int status = 0;
 
-	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
+	status = ocfs2_blkno_stringify(blkno, name);
 	if (status < 0) {
 		mlog_errno(status);
 		return status;
-- 
cgit v1.2.3


From bc13d347574fc0a8a666bc0f4cc2b635d202e372 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:44:14 +0800
Subject: ocfs2: Create reflinked file in orphan dir.

reflink is a very complicated process, so it can't be integrated
into one transaction. So if the system panic in the operation, we
may leave a unfinished inode in the destication directory.

So we will try to create an inode in orphan_dir first, reflink it
to the src file and then move it to the destication file in the end.
In that way we won't be afraid of any corruption during the reflink.

This patch adds 2 functions for orphan_dir operation:
1. Create a new inode in orphand dir.
2. Move an inode to a target dir.

Note:
fsck.ocfs2 should work for us to remove the unfinished file in the
orphan_dir.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/namei.c | 268 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/namei.h |   6 ++
 2 files changed, 274 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 818df582ba06..f010b22b1c44 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2041,6 +2041,274 @@ leave:
 	return status;
 }
 
+int ocfs2_create_inode_in_orphan(struct inode *dir,
+				 int mode,
+				 struct inode **new_inode)
+{
+	int status, did_quota_inode = 0;
+	struct inode *inode = NULL;
+	struct inode *orphan_dir = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+	struct ocfs2_dinode *di = NULL;
+	handle_t *handle = NULL;
+	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
+	struct buffer_head *parent_di_bh = NULL;
+	struct buffer_head *new_di_bh = NULL;
+	struct ocfs2_alloc_context *inode_ac = NULL;
+	struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
+
+	status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
+	if (status < 0) {
+		if (status != -ENOENT)
+			mlog_errno(status);
+		return status;
+	}
+
+	/*
+	 * We give the orphan dir the root blkno to fake an orphan name,
+	 * and allocate enough space for our insertion.
+	 */
+	status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
+					  osb->root_blkno,
+					  orphan_name, &orphan_insert);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	/* reserve an inode spot */
+	status = ocfs2_reserve_new_inode(osb, &inode_ac);
+	if (status < 0) {
+		if (status != -ENOSPC)
+			mlog_errno(status);
+		goto leave;
+	}
+
+	inode = ocfs2_get_init_inode(dir, mode);
+	if (!inode) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto leave;
+	}
+
+	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, 0, 0));
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		handle = NULL;
+		mlog_errno(status);
+		goto leave;
+	}
+
+	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
+	 * to be called. */
+	if (sb_any_quota_active(osb->sb) &&
+	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
+		status = -EDQUOT;
+		goto leave;
+	}
+	did_quota_inode = 1;
+
+	/* do the real work now. */
+	status = ocfs2_mknod_locked(osb, dir, inode,
+				    0, &new_di_bh, parent_di_bh, handle,
+				    inode_ac);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	di = (struct ocfs2_dinode *)new_di_bh->b_data;
+	status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name,
+				  &orphan_insert, orphan_dir);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	/* get open lock so that only nodes can't remove it from orphan dir. */
+	status = ocfs2_open_lock(inode);
+	if (status < 0)
+		mlog_errno(status);
+
+leave:
+	if (status < 0 && did_quota_inode)
+		vfs_dq_free_inode(inode);
+	if (handle)
+		ocfs2_commit_trans(osb, handle);
+
+	if (orphan_dir) {
+		/* This was locked for us in ocfs2_prepare_orphan_dir() */
+		ocfs2_inode_unlock(orphan_dir, 1);
+		mutex_unlock(&orphan_dir->i_mutex);
+		iput(orphan_dir);
+	}
+
+	if (status == -ENOSPC)
+		mlog(0, "Disk is full\n");
+
+	if ((status < 0) && inode) {
+		clear_nlink(inode);
+		iput(inode);
+	}
+
+	if (inode_ac)
+		ocfs2_free_alloc_context(inode_ac);
+
+	brelse(new_di_bh);
+
+	if (!status)
+		*new_inode = inode;
+
+	ocfs2_free_dir_lookup_result(&orphan_insert);
+
+	ocfs2_inode_unlock(dir, 1);
+	brelse(parent_di_bh);
+	return status;
+}
+
+int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
+				   struct inode *inode,
+				   struct dentry *dentry)
+{
+	int status = 0;
+	struct buffer_head *parent_di_bh = NULL;
+	handle_t *handle = NULL;
+	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+	struct ocfs2_dinode *dir_di, *di;
+	struct inode *orphan_dir_inode = NULL;
+	struct buffer_head *orphan_dir_bh = NULL;
+	struct buffer_head *di_bh = NULL;
+	struct ocfs2_dir_lookup_result lookup = { NULL, };
+
+	mlog_entry("(0x%p, 0x%p, %.*s')\n", dir, dentry,
+		   dentry->d_name.len, dentry->d_name.name);
+
+	status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
+	if (status < 0) {
+		if (status != -ENOENT)
+			mlog_errno(status);
+		return status;
+	}
+
+	dir_di = (struct ocfs2_dinode *) parent_di_bh->b_data;
+	if (!dir_di->i_links_count) {
+		/* can't make a file in a deleted directory. */
+		status = -ENOENT;
+		goto leave;
+	}
+
+	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
+					   dentry->d_name.len);
+	if (status)
+		goto leave;
+
+	/* get a spot inside the dir. */
+	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_di_bh,
+					      dentry->d_name.name,
+					      dentry->d_name.len, &lookup);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
+	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
+						       ORPHAN_DIR_SYSTEM_INODE,
+						       osb->slot_num);
+	if (!orphan_dir_inode) {
+		status = -EEXIST;
+		mlog_errno(status);
+		goto leave;
+	}
+
+	mutex_lock(&orphan_dir_inode->i_mutex);
+
+	status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+	if (status < 0) {
+		mlog_errno(status);
+		mutex_unlock(&orphan_dir_inode->i_mutex);
+		iput(orphan_dir_inode);
+		goto leave;
+	}
+
+	status = ocfs2_read_inode_block(inode, &di_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto orphan_unlock;
+	}
+
+	handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		handle = NULL;
+		mlog_errno(status);
+		goto orphan_unlock;
+	}
+
+	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+					 di_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_commit;
+	}
+
+	status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
+				  orphan_dir_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_commit;
+	}
+
+	di = (struct ocfs2_dinode *)di_bh->b_data;
+	le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
+	di->i_orphaned_slot = 0;
+	ocfs2_journal_dirty(handle, di_bh);
+
+	status = ocfs2_add_entry(handle, dentry, inode,
+				 OCFS2_I(inode)->ip_blkno, parent_di_bh,
+				 &lookup);
+	if (status < 0) {
+		mlog_errno(status);
+		goto out_commit;
+	}
+
+	status = ocfs2_dentry_attach_lock(dentry, inode,
+					  OCFS2_I(dir)->ip_blkno);
+	if (status) {
+		mlog_errno(status);
+		goto out_commit;
+	}
+
+	insert_inode_hash(inode);
+	dentry->d_op = &ocfs2_dentry_ops;
+	d_instantiate(dentry, inode);
+	status = 0;
+out_commit:
+	ocfs2_commit_trans(osb, handle);
+orphan_unlock:
+	ocfs2_inode_unlock(orphan_dir_inode, 1);
+	mutex_unlock(&orphan_dir_inode->i_mutex);
+	iput(orphan_dir_inode);
+leave:
+
+	ocfs2_inode_unlock(dir, 1);
+
+	brelse(di_bh);
+	brelse(parent_di_bh);
+	brelse(orphan_dir_bh);
+
+	ocfs2_free_dir_lookup_result(&lookup);
+
+	mlog_exit(status);
+
+	return status;
+}
+
 const struct inode_operations ocfs2_dir_iops = {
 	.create		= ocfs2_create,
 	.lookup		= ocfs2_lookup,
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index 688aef64c879..e5d059d4f115 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -35,5 +35,11 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 		     struct inode *orphan_dir_inode,
 		     struct inode *inode,
 		     struct buffer_head *orphan_dir_bh);
+int ocfs2_create_inode_in_orphan(struct inode *dir,
+				 int mode,
+				 struct inode **new_inode);
+int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
+				   struct inode *new_inode,
+				   struct dentry *new_dentry);
 
 #endif /* OCFS2_NAMEI_H */
-- 
cgit v1.2.3


From 0fe9b66c65f3ff227da45381afe7612f91e32740 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:47:56 +0800
Subject: ocfs2: Add preserve to reflink.

reflink has 2 options for the destination file:
1. snapshot: reflink will attempt to preserve ownership, permissions,
   and all other security state in order to create a full snapshot.
2. new file: it will acquire the data extent sharing but will see the
   file's security state and attributes initialized as a new file.

So add the option to ocfs2.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c | 38 +++++++++++--------
 fs/ocfs2/xattr.c        | 98 ++++++++++++++++++++++++++++++++++++++++++++++---
 fs/ocfs2/xattr.h        |  5 ++-
 3 files changed, 119 insertions(+), 22 deletions(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 47df8c5cd3c5..5d88e76f223a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3904,7 +3904,8 @@ out:
 static int ocfs2_complete_reflink(struct inode *s_inode,
 				  struct buffer_head *s_bh,
 				  struct inode *t_inode,
-				  struct buffer_head *t_bh)
+				  struct buffer_head *t_bh,
+				  bool preserve)
 {
 	int ret;
 	handle_t *handle;
@@ -3939,22 +3940,26 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
 	di->i_size = s_di->i_size;
 	di->i_dyn_features = s_di->i_dyn_features;
 	di->i_attr = s_di->i_attr;
-	di->i_uid = s_di->i_uid;
-	di->i_gid = s_di->i_gid;
-	di->i_mode = s_di->i_mode;
 
-	/*
-	 * update time.
-	 * we want mtime to appear identical to the source and update ctime.
-	 */
-	t_inode->i_ctime = CURRENT_TIME;
+	if (preserve) {
+		di->i_uid = s_di->i_uid;
+		di->i_gid = s_di->i_gid;
+		di->i_mode = s_di->i_mode;
+
+		/*
+		 * update time.
+		 * we want mtime to appear identical to the source and
+		 * update ctime.
+		 */
+		t_inode->i_ctime = CURRENT_TIME;
 
-	di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
-	di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
+		di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
+		di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
 
-	t_inode->i_mtime = s_inode->i_mtime;
-	di->i_mtime = s_di->i_mtime;
-	di->i_mtime_nsec = s_di->i_mtime_nsec;
+		t_inode->i_mtime = s_inode->i_mtime;
+		di->i_mtime = s_di->i_mtime;
+		di->i_mtime_nsec = s_di->i_mtime_nsec;
+	}
 
 	ocfs2_journal_dirty(handle, t_bh);
 
@@ -3966,7 +3971,8 @@ out_commit:
 static int ocfs2_create_reflink_node(struct inode *s_inode,
 				     struct buffer_head *s_bh,
 				     struct inode *t_inode,
-				     struct buffer_head *t_bh)
+				     struct buffer_head *t_bh,
+				     bool preserve)
 {
 	int ret;
 	struct buffer_head *ref_root_bh = NULL;
@@ -4001,7 +4007,7 @@ static int ocfs2_create_reflink_node(struct inode *s_inode,
 		goto out_unlock_refcount;
 	}
 
-	ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh);
+	ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh, preserve);
 	if (ret)
 		mlog_errno(ret);
 
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 8d1a0abc105c..fe3419068df2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -56,6 +56,7 @@
 #include "super.h"
 #include "xattr.h"
 #include "refcounttree.h"
+#include "acl.h"
 
 struct ocfs2_xattr_def_value_root {
 	struct ocfs2_xattr_value_root	xv;
@@ -204,6 +205,8 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
 					   int offset,
 					   struct ocfs2_xattr_value_root **xv,
 					   struct buffer_head **bh);
+static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
+				    const void *value, size_t size, int flags);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -5994,6 +5997,7 @@ out:
 	return ret;
 }
 
+typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
 /*
  * Store the information we need in xattr reflink.
  * old_bh and new_bh are inode bh for the old and new inode.
@@ -6006,6 +6010,7 @@ struct ocfs2_xattr_reflink {
 	struct ocfs2_caching_info *ref_ci;
 	struct buffer_head *ref_root_bh;
 	struct ocfs2_cached_dealloc_ctxt *dealloc;
+	should_xattr_reflinked *xattr_reflinked;
 };
 
 /*
@@ -6147,6 +6152,9 @@ out:
  * NOTE:
  * Before we call this function, the caller has memcpy the xattr in
  * old_xh to the new_xh.
+ *
+ * If args.xattr_reflinked is set, call it to decide whether the xe should
+ * be reflinked or not. If not, remove it from the new xattr header.
  */
 static int ocfs2_reflink_xattr_header(handle_t *handle,
 				      struct ocfs2_xattr_reflink *args,
@@ -6159,10 +6167,10 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
 				      get_xattr_value_root *func,
 				      void *para)
 {
-	int ret = 0, i;
+	int ret = 0, i, j;
 	struct super_block *sb = args->old_inode->i_sb;
 	struct buffer_head *value_bh;
-	struct ocfs2_xattr_entry *xe;
+	struct ocfs2_xattr_entry *xe, *last;
 	struct ocfs2_xattr_value_root *xv, *new_xv;
 	struct ocfs2_extent_tree data_et;
 	u32 clusters, cpos, p_cluster, num_clusters;
@@ -6170,9 +6178,30 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
 
 	mlog(0, "reflink xattr in container %llu, count = %u\n",
 	     (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
-	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
+
+	last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
+	for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
 		xe = &xh->xh_entries[i];
 
+		if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
+			xe = &new_xh->xh_entries[j];
+
+			le16_add_cpu(&new_xh->xh_count, -1);
+			if (new_xh->xh_count) {
+				memmove(xe, xe + 1,
+					(void *)last - (void *)xe);
+				memset(last, 0,
+				       sizeof(struct ocfs2_xattr_entry));
+			}
+
+			/*
+			 * We don't want j to increase in the next round since
+			 * it is already moved ahead.
+			 */
+			j--;
+			continue;
+		}
+
 		if (ocfs2_xattr_is_local(xe))
 			continue;
 
@@ -6182,7 +6211,7 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
 			break;
 		}
 
-		ret = func(sb, new_bh, new_xh, i, &new_xv, &value_bh, para);
+		ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
 		if (ret) {
 			mlog_errno(ret);
 			break;
@@ -6847,10 +6876,20 @@ out:
 	return ret;
 }
 
+static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
+{
+	int type = ocfs2_xattr_get_type(xe);
+
+	return type != OCFS2_XATTR_INDEX_SECURITY &&
+	       type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
+	       type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+}
+
 int ocfs2_reflink_xattrs(struct inode *old_inode,
 			 struct buffer_head *old_bh,
 			 struct inode *new_inode,
-			 struct buffer_head *new_bh)
+			 struct buffer_head *new_bh,
+			 bool preserve_security)
 {
 	int ret;
 	struct ocfs2_xattr_reflink args;
@@ -6878,6 +6917,10 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
 	args.ref_ci = &ref_tree->rf_ci;
 	args.ref_root_bh = ref_root_bh;
 	args.dealloc = &dealloc;
+	if (preserve_security)
+		args.xattr_reflinked = NULL;
+	else
+		args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
 
 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
 		ret = ocfs2_reflink_xattr_inline(&args);
@@ -6917,6 +6960,51 @@ out:
 	return ret;
 }
 
+/*
+ * Initialize security and acl for a already created inode.
+ * Used for reflink a non-preserve-security file.
+ *
+ * It uses common api like ocfs2_xattr_set, so the caller
+ * must not hold any lock expect i_mutex.
+ */
+int ocfs2_init_security_and_acl(struct inode *dir,
+				struct inode *inode)
+{
+	int ret = 0;
+	struct buffer_head *dir_bh = NULL;
+	struct ocfs2_security_xattr_info si = {
+		.enable = 1,
+	};
+
+	ret = ocfs2_init_security_get(inode, dir, &si);
+	if (!ret) {
+		ret = ocfs2_xattr_security_set(inode, si.name,
+					       si.value, si.value_len,
+					       XATTR_CREATE);
+		if (ret) {
+			mlog_errno(ret);
+			goto leave;
+		}
+	} else if (ret != -EOPNOTSUPP) {
+		mlog_errno(ret);
+		goto leave;
+	}
+
+	ret = ocfs2_inode_lock(dir, &dir_bh, 0);
+	if (ret) {
+		mlog_errno(ret);
+		goto leave;
+	}
+
+	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
+	if (ret)
+		mlog_errno(ret);
+
+	ocfs2_inode_unlock(dir, 0);
+	brelse(dir_bh);
+leave:
+	return ret;
+}
 /*
  * 'security' attributes support
  */
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 4f913053d5ee..08e36389f56d 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -93,5 +93,8 @@ int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
 int ocfs2_reflink_xattrs(struct inode *old_inode,
 			 struct buffer_head *old_bh,
 			 struct inode *new_inode,
-			 struct buffer_head *new_bh);
+			 struct buffer_head *new_bh,
+			 bool preserve_security);
+int ocfs2_init_security_and_acl(struct inode *dir,
+				struct inode *inode);
 #endif /* OCFS2_XATTR_H */
-- 
cgit v1.2.3


From 09bf27a000209e9e8c9c048b4c50f6bb0dd857bb Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 21 Sep 2009 10:38:17 +0800
Subject: ocfs2: Implement ocfs2_reflink.

Implement ocfs2_reflink.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/refcounttree.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5d88e76f223a..7a8a384d8ad1 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -33,6 +33,7 @@
 #include "extent_map.h"
 #include "aops.h"
 #include "xattr.h"
+#include "namei.h"
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
@@ -4022,3 +4023,125 @@ out:
 
 	return ret;
 }
+
+static int __ocfs2_reflink(struct dentry *old_dentry,
+			   struct buffer_head *old_bh,
+			   struct inode *new_inode,
+			   bool preserve)
+{
+	int ret;
+	struct inode *inode = old_dentry->d_inode;
+	struct buffer_head *new_bh = NULL;
+
+	ret = filemap_fdatawrite(inode->i_mapping);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_attach_refcount_tree(inode, old_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mutex_lock(&new_inode->i_mutex);
+	ret = ocfs2_inode_lock(new_inode, &new_bh, 1);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	ret = ocfs2_create_reflink_node(inode, old_bh,
+					new_inode, new_bh, preserve);
+	if (ret) {
+		mlog_errno(ret);
+		goto inode_unlock;
+	}
+
+	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+		ret = ocfs2_reflink_xattrs(inode, old_bh,
+					   new_inode, new_bh,
+					   preserve);
+		if (ret)
+			mlog_errno(ret);
+	}
+inode_unlock:
+	ocfs2_inode_unlock(new_inode, 1);
+	brelse(new_bh);
+out_unlock:
+	mutex_unlock(&new_inode->i_mutex);
+out:
+	if (!ret) {
+		ret = filemap_fdatawait(inode->i_mapping);
+		if (ret)
+			mlog_errno(ret);
+	}
+	return ret;
+}
+
+static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
+			 struct dentry *new_dentry, bool preserve)
+{
+	int error;
+	struct inode *inode = old_dentry->d_inode;
+	struct buffer_head *old_bh = NULL;
+	struct inode *new_orphan_inode = NULL;
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
+		return -EOPNOTSUPP;
+
+	error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
+					     &new_orphan_inode);
+	if (error) {
+		mlog_errno(error);
+		goto out;
+	}
+
+	error = ocfs2_inode_lock(inode, &old_bh, 1);
+	if (error) {
+		mlog_errno(error);
+		goto out;
+	}
+
+	down_write(&OCFS2_I(inode)->ip_xattr_sem);
+	down_write(&OCFS2_I(inode)->ip_alloc_sem);
+	error = __ocfs2_reflink(old_dentry, old_bh,
+				new_orphan_inode, preserve);
+	up_write(&OCFS2_I(inode)->ip_alloc_sem);
+	up_write(&OCFS2_I(inode)->ip_xattr_sem);
+
+	ocfs2_inode_unlock(inode, 1);
+	brelse(old_bh);
+
+	if (error) {
+		mlog_errno(error);
+		goto out;
+	}
+
+	/* If the security isn't preserved, we need to re-initialize them. */
+	if (!preserve) {
+		error = ocfs2_init_security_and_acl(dir, new_orphan_inode);
+		if (error)
+			mlog_errno(error);
+	}
+out:
+	if (!error) {
+		error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
+						       new_dentry);
+		if (error)
+			mlog_errno(error);
+	}
+
+	if (new_orphan_inode) {
+		/*
+		 * We need to open_unlock the inode no matter whether we
+		 * succeed or not, so that other nodes can delete it later.
+		 */
+		ocfs2_open_unlock(new_orphan_inode);
+		if (error)
+			iput(new_orphan_inode);
+	}
+
+	return error;
+}
-- 
cgit v1.2.3


From 64871b8d62570fabec3b0959d494f8e0b87f5c4b Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 18 Aug 2009 11:48:02 +0800
Subject: ocfs2: Enable refcount tree support.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/ocfs2_fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 40072cdef7b6..4a4565b7bc14 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -99,7 +99,8 @@
 					 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
 					 | OCFS2_FEATURE_INCOMPAT_XATTR \
 					 | OCFS2_FEATURE_INCOMPAT_META_ECC \
-					 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS)
+					 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
+					 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
 #define OCFS2_FEATURE_RO_COMPAT_SUPP	(OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
 					 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
 					 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
-- 
cgit v1.2.3


From bd50873dc725a9fa72592ecc986c58805e823051 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 21 Sep 2009 11:25:14 +0800
Subject: ocfs2: Add ioctl for reflink.

The ioctl will take 3 parameters: old_path, new_path and
preserve and call vfs_reflink. It is useful when we backport
reflink features to old kernels.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/ioctl.c        |  14 ++++
 fs/ocfs2/ocfs2_fs.h     |   9 +++
 fs/ocfs2/refcounttree.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/refcounttree.h |   4 ++
 4 files changed, 193 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 9fcd36dcc9a0..a68d0e4ca6dc 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -22,6 +22,7 @@
 #include "ocfs2_fs.h"
 #include "ioctl.h"
 #include "resize.h"
+#include "refcounttree.h"
 
 #include <linux/ext2_fs.h>
 
@@ -116,6 +117,9 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	int status;
 	struct ocfs2_space_resv sr;
 	struct ocfs2_new_group_input input;
+	struct reflink_arguments args;
+	const char *old_path, *new_path;
+	bool preserve;
 
 	switch (cmd) {
 	case OCFS2_IOC_GETFLAGS:
@@ -161,6 +165,15 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			return -EFAULT;
 
 		return ocfs2_group_add(inode, &input);
+	case OCFS2_IOC_REFLINK:
+		if (copy_from_user(&args, (struct reflink_arguments *)arg,
+				   sizeof(args)))
+			return -EFAULT;
+		old_path = (const char *)(unsigned long)args.old_path;
+		new_path = (const char *)(unsigned long)args.new_path;
+		preserve = (args.preserve != 0);
+
+		return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve);
 	default:
 		return -ENOTTY;
 	}
@@ -183,6 +196,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case OCFS2_IOC_GROUP_EXTEND:
 	case OCFS2_IOC_GROUP_ADD:
 	case OCFS2_IOC_GROUP_ADD64:
+	case OCFS2_IOC_REFLINK:
 		break;
 	default:
 		return -ENOIOCTLCMD;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 4a4565b7bc14..e9431e4a5e7c 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -301,6 +301,15 @@ struct ocfs2_new_group_input {
 #define OCFS2_IOC_GROUP_ADD	_IOW('o', 2,struct ocfs2_new_group_input)
 #define OCFS2_IOC_GROUP_ADD64	_IOW('o', 3,struct ocfs2_new_group_input)
 
+/* Used to pass 2 file names to reflink. */
+struct reflink_arguments {
+	__u64 old_path;
+	__u64 new_path;
+	__u64 preserve;
+};
+#define OCFS2_IOC_REFLINK	_IOW('o', 4, struct reflink_arguments)
+
+
 /*
  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
  */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7a8a384d8ad1..60287fc56bcb 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -42,6 +42,11 @@
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
 #include <linux/swap.h>
+#include <linux/security.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
 
 struct ocfs2_cow_context {
 	struct inode *inode;
@@ -4145,3 +4150,164 @@ out:
 
 	return error;
 }
+
+/*
+ * Below here are the bits used by OCFS2_IOC_REFLINK() to fake
+ * sys_reflink().  This will go away when vfs_reflink() exists in
+ * fs/namei.c.
+ */
+
+/* copied from may_create in VFS. */
+static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
+{
+	if (child->d_inode)
+		return -EEXIST;
+	if (IS_DEADDIR(dir))
+		return -ENOENT;
+	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+}
+
+/* copied from user_path_parent. */
+static int ocfs2_user_path_parent(const char __user *path,
+				  struct nameidata *nd, char **name)
+{
+	char *s = getname(path);
+	int error;
+
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	error = path_lookup(s, LOOKUP_PARENT, nd);
+	if (error)
+		putname(s);
+	else
+		*name = s;
+
+	return error;
+}
+
+/**
+ * ocfs2_vfs_reflink - Create a reference-counted link
+ *
+ * @old_dentry:        source dentry + inode
+ * @dir:       directory to create the target
+ * @new_dentry:        target dentry
+ * @preserve:  if true, preserve all file attributes
+ */
+int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
+		      struct dentry *new_dentry, bool preserve)
+{
+	struct inode *inode = old_dentry->d_inode;
+	int error;
+
+	if (!inode)
+		return -ENOENT;
+
+	error = ocfs2_may_create(dir, new_dentry);
+	if (error)
+		return error;
+
+	if (dir->i_sb != inode->i_sb)
+		return -EXDEV;
+
+	/*
+	 * A reflink to an append-only or immutable file cannot be created.
+	 */
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return -EPERM;
+
+	/* Only regular files can be reflinked. */
+	if (!S_ISREG(inode->i_mode))
+		return -EPERM;
+
+	/*
+	 * If the caller wants to preserve ownership, they require the
+	 * rights to do so.
+	 */
+	if (preserve) {
+		if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN))
+			return -EPERM;
+		if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
+			return -EPERM;
+	}
+
+	/*
+	 * If the caller is modifying any aspect of the attributes, they
+	 * are not creating a snapshot.  They need read permission on the
+	 * file.
+	 */
+	if (!preserve) {
+		error = inode_permission(inode, MAY_READ);
+		if (error)
+			return error;
+	}
+
+	mutex_lock(&inode->i_mutex);
+	vfs_dq_init(dir);
+	error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
+	mutex_unlock(&inode->i_mutex);
+	if (!error)
+		fsnotify_create(dir, new_dentry);
+	return error;
+}
+/*
+ * Most codes are copied from sys_linkat.
+ */
+int ocfs2_reflink_ioctl(struct inode *inode,
+			const char __user *oldname,
+			const char __user *newname,
+			bool preserve)
+{
+	struct dentry *new_dentry;
+	struct nameidata nd;
+	struct path old_path;
+	int error;
+	char *to = NULL;
+
+	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
+		return -EOPNOTSUPP;
+
+	error = user_path_at(AT_FDCWD, oldname, 0, &old_path);
+	if (error) {
+		mlog_errno(error);
+		return error;
+	}
+
+	error = ocfs2_user_path_parent(newname, &nd, &to);
+	if (error) {
+		mlog_errno(error);
+		goto out;
+	}
+
+	error = -EXDEV;
+	if (old_path.mnt != nd.path.mnt)
+		goto out_release;
+	new_dentry = lookup_create(&nd, 0);
+	error = PTR_ERR(new_dentry);
+	if (IS_ERR(new_dentry)) {
+		mlog_errno(error);
+		goto out_unlock;
+	}
+
+	error = mnt_want_write(nd.path.mnt);
+	if (error) {
+		mlog_errno(error);
+		goto out_dput;
+	}
+
+	error = ocfs2_vfs_reflink(old_path.dentry,
+				  nd.path.dentry->d_inode,
+				  new_dentry, preserve);
+	mnt_drop_write(nd.path.mnt);
+out_dput:
+	dput(new_dentry);
+out_unlock:
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+out_release:
+	path_put(&nd.path);
+	putname(to);
+out:
+	path_put(&old_path);
+
+	return error;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 2c238e682570..c1d19b1d3ecc 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -99,4 +99,8 @@ int ocfs2_increase_refcount(handle_t *handle,
 			    u64 cpos, u32 len,
 			    struct ocfs2_alloc_context *meta_ac,
 			    struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_reflink_ioctl(struct inode *inode,
+			const char __user *oldname,
+			const char __user *newname,
+			bool preserve);
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
cgit v1.2.3


From a2f2ddbf2bafdbc7e4f3bbf09439b42c8fee2747 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 19 Aug 2009 15:16:01 -0700
Subject: ocfs2: __ocfs2_abort() should not enable panic for local mounts

In a clustered setup, we have to panic the box on journal abort. This is
because we don't have the facility to go hard readonly. With hard ro, another
node would detect node failure and initiate recovery.

Having said that, we shouldn't force panic if the volume is mounted locally.
This patch defers the handling to the mount option, errors.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8b6062176970..154e62522b05 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2494,7 +2494,8 @@ void __ocfs2_abort(struct super_block* sb,
 	/* Force a panic(). This stinks, but it's better than letting
 	 * things continue without having a proper hard readonly
 	 * here. */
-	OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
+	if (!ocfs2_mount_local(OCFS2_SB(sb)))
+		OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
 	ocfs2_handle_error(sb);
 }
 
-- 
cgit v1.2.3


From d92bc5127b27f315ef0ef2c1e1829fd6a5cba54a Mon Sep 17 00:00:00 2001
From: Coly Li <coly.li@suse.de>
Date: Fri, 28 Aug 2009 19:03:18 +0800
Subject: dlmglue.c: add missed mlog lines

This patch adds the missed mlog_exit() and mlog_exit_void() lines when routines
return.

Signed-off-by: Coly Li <coly.li@suse.de>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index bb2fc6993e2a..0d38d67194cb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1577,8 +1577,10 @@ int ocfs2_rw_lock(struct inode *inode, int write)
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
 	     write ? "EXMODE" : "PRMODE");
 
-	if (ocfs2_mount_local(osb))
+	if (ocfs2_mount_local(osb)) {
+		mlog_exit(0);
 		return 0;
+	}
 
 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
 
@@ -3038,6 +3040,7 @@ static void ocfs2_unlock_ast(void *opaque, int error)
 		     "unlock_action %d\n", error, lockres->l_name,
 		     lockres->l_unlock_action);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
+		mlog_exit_void();
 		return;
 	}
 
-- 
cgit v1.2.3


From 83e32d9044a4510fffdf65c2691a25c0ba84e259 Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Thu, 3 Sep 2009 15:56:33 +0800
Subject: ocfs2: add spinlock protection when dealing with lockres->purge.

when we check/modify lockres->purge, we should with the protection of lockres->spinlock.
in dlm_purge_lockres(), the checking/modifying is not with the protectin.
this patch fixes it.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmthread.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d490b66ad9d7..98569e86c613 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -212,14 +212,18 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 		spin_lock(&dlm->spinlock);
 	}
 
+	spin_lock(&res->spinlock);
 	if (!list_empty(&res->purge)) {
 		mlog(0, "removing lockres %.*s:%p from purgelist, "
 		     "master = %d\n", res->lockname.len, res->lockname.name,
 		     res, master);
 		list_del_init(&res->purge);
+		spin_unlock(&res->spinlock);
 		dlm_lockres_put(res);
 		dlm->purge_count--;
-	}
+	} else
+		spin_unlock(&res->spinlock);
+
 	__dlm_unhash_lockres(res);
 
 	/* lockres is not in the hash now.  drop the flag and wake up
-- 
cgit v1.2.3


From b80474b432913f73cce8db001e9fa3104f9b79ee Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 10 Sep 2009 15:28:47 +0800
Subject: ocfs2: Use buffer IO if we are appending a file.

In ocfs2_file_aio_write, we will prevent direct io if
we find that we are appending(changing i_size) and call
generic_file_aio_write_nolock. But actually O_DIRECT flag
is there and this function will call generic_file_direct_write
eventually which will update i_size and leave di->i_size
alone. The bug is
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1173.

So this patch let ocfs2_direct_IO returns 0 directly if we
are appending so that buffered write will be called and
di->i_size get updated successfully. And this is also
what we want in ocfs2_file_aio_write.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/aops.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs/ocfs2')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 33e03c551127..72e76062a900 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -690,6 +690,10 @@ static ssize_t ocfs2_direct_IO(int rw,
 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
 		return 0;
 
+	/* Fallback to buffered I/O if we are appending. */
+	if (i_size_read(inode) <= offset)
+		return 0;
+
 	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
 					    inode->i_sb->s_bdev, iov, offset,
 					    nr_segs, 
-- 
cgit v1.2.3