Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: [PATCH] ocfs2: fix oops in mmap_truncate testing configfs: call drop_link() to cleanup after create_link() failure configfs: Allow ->make_item() and ->make_group() to return detailed errors. configfs: Fix failing mkdir() making racing rmdir() fail configfs: Fix deadlock with racing rmdir() and rename() configfs: Make configfs_new_dirent() return error code instead of NULL configfs: Protect configfs_dirent s_links list mutations configfs: Introduce configfs_dirent_lock ocfs2: Don't snprintf() without a format. ocfs2: Fix CONFIG_OCFS2_DEBUG_FS #ifdefs ocfs2/net: Silence build warnings on sparc64 ocfs2: Handle error during journal load ocfs2: Silence an error message in ocfs2_file_aio_read() ocfs2: use simple_read_from_buffer() ocfs2: fix printk format warnings with OCFS2_FS_STATS=n [PATCH 2/2] ocfs2: Instrument fs cluster locks [PATCH 1/2] ocfs2: Add CONFIG_OCFS2_FS_STATS config option
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-07-17 10:55:51 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-07-17 10:55:51 -0700
commit: 5b664cb235e97afbf34db9c4d77f08ebd725335e (patch)
tree: 518540649c38342209790de8e0b575ac1a6fa722 /fs
parent: f39548a6ad1dbdfaab552419386ec5bb1d76fa0d (diff)
parent: c0420ad2ca514551ca086510b0e7d17a05c70492 (diff)
18 files changed, 361 insertions, 111 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 17216ba99c8..37db79a2ff9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -470,6 +470,14 @@ config OCFS2_FS_USERSPACE_CLUSTER
 	  It is safe to say Y, as the clustering method is run-time
 	  selectable.
 
+config OCFS2_FS_STATS
+	bool "OCFS2 statistics"
+	depends on OCFS2_FS
+	default y
+	help
+	  This option allows some fs statistics to be captured. Enabling
+	  this option may increase the memory consumption.
+
 config OCFS2_DEBUG_MASKLOG
 	bool "OCFS2 logging support"
 	depends on OCFS2_FS
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index cca98609aa7..da015c12e3e 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -26,6 +26,7 @@
 
 #include <linux/slab.h>
 #include <linux/list.h>
+#include <linux/spinlock.h>
 
 struct configfs_dirent {
 	atomic_t		s_count;
@@ -47,8 +48,11 @@ struct configfs_dirent {
 #define CONFIGFS_USET_DIR	0x0040
 #define CONFIGFS_USET_DEFAULT	0x0080
 #define CONFIGFS_USET_DROPPING	0x0100
+#define CONFIGFS_USET_IN_MKDIR	0x0200
 #define CONFIGFS_NOT_PINNED	(CONFIGFS_ITEM_ATTR)
 
+extern spinlock_t configfs_dirent_lock;
+
 extern struct vfsmount * configfs_mount;
 extern struct kmem_cache *configfs_dir_cachep;
 
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index a48dc7dd876..0e64312a084 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -30,11 +30,25 @@
 #include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/err.h>
 
 #include <linux/configfs.h>
 #include "configfs_internal.h"
 
 DECLARE_RWSEM(configfs_rename_sem);
+/*
+ * Protects mutations of configfs_dirent linkage together with proper i_mutex
+ * Also protects mutations of symlinks linkage to target configfs_dirent
+ * Mutators of configfs_dirent linkage must *both* have the proper inode locked
+ * and configfs_dirent_lock locked, in that order.
+ * This allows one to safely traverse configfs_dirent trees and symlinks without
+ * having to lock inodes.
+ *
+ * Protects setting of CONFIGFS_USET_DROPPING: checking the flag
+ * unlocked is not reliable unless in detach_groups() called from
+ * rmdir()/unregister() and from configfs_attach_group()
+ */
+DEFINE_SPINLOCK(configfs_dirent_lock);
 
 static void configfs_d_iput(struct dentry * dentry,
 			    struct inode * inode)
@@ -74,13 +88,20 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
 
 	sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL);
 	if (!sd)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	atomic_set(&sd->s_count, 1);
 	INIT_LIST_HEAD(&sd->s_links);
 	INIT_LIST_HEAD(&sd->s_children);
-	list_add(&sd->s_sibling, &parent_sd->s_children);
 	sd->s_element = element;
+	spin_lock(&configfs_dirent_lock);
+	if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
+		spin_unlock(&configfs_dirent_lock);
+		kmem_cache_free(configfs_dir_cachep, sd);
+		return ERR_PTR(-ENOENT);
+	}
+	list_add(&sd->s_sibling, &parent_sd->s_children);
+	spin_unlock(&configfs_dirent_lock);
 
 	return sd;
 }
@@ -118,8 +139,8 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
 	struct configfs_dirent * sd;
 
 	sd = configfs_new_dirent(parent_sd, element);
-	if (!sd)
-		return -ENOMEM;
+	if (IS_ERR(sd))
+		return PTR_ERR(sd);
 
 	sd->s_mode = mode;
 	sd->s_type = type;
@@ -173,7 +194,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
 		} else {
 			struct configfs_dirent *sd = d->d_fsdata;
 			if (sd) {
+				spin_lock(&configfs_dirent_lock);
 				list_del_init(&sd->s_sibling);
+				spin_unlock(&configfs_dirent_lock);
 				configfs_put(sd);
 			}
 		}
@@ -224,7 +247,9 @@ int configfs_create_link(struct configfs_symlink *sl,
 		else {
 			struct configfs_dirent *sd = dentry->d_fsdata;
 			if (sd) {
+				spin_lock(&configfs_dirent_lock);
 				list_del_init(&sd->s_sibling);
+				spin_unlock(&configfs_dirent_lock);
 				configfs_put(sd);
 			}
 		}
@@ -238,7 +263,9 @@ static void remove_dir(struct dentry * d)
 	struct configfs_dirent * sd;
 
 	sd = d->d_fsdata;
+	spin_lock(&configfs_dirent_lock);
 	list_del_init(&sd->s_sibling);
+	spin_unlock(&configfs_dirent_lock);
 	configfs_put(sd);
 	if (d->d_inode)
 		simple_rmdir(parent->d_inode,d);
@@ -331,13 +358,13 @@ static struct dentry * configfs_lookup(struct inode *dir,
 
 /*
  * Only subdirectories count here.  Files (CONFIGFS_NOT_PINNED) are
- * attributes and are removed by rmdir().  We recurse, taking i_mutex
- * on all children that are candidates for default detach.  If the
- * result is clean, then configfs_detach_group() will handle dropping
- * i_mutex.  If there is an error, the caller will clean up the i_mutex
- * holders via configfs_detach_rollback().
+ * attributes and are removed by rmdir().  We recurse, setting
+ * CONFIGFS_USET_DROPPING on all children that are candidates for
+ * default detach.
+ * If there is an error, the caller will reset the flags via
+ * configfs_detach_rollback().
  */
-static int configfs_detach_prep(struct dentry *dentry)
+static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex)
 {
 	struct configfs_dirent *parent_sd = dentry->d_fsdata;
 	struct configfs_dirent *sd;
@@ -352,15 +379,20 @@ static int configfs_detach_prep(struct dentry *dentry)
 		if (sd->s_type & CONFIGFS_NOT_PINNED)
 			continue;
 		if (sd->s_type & CONFIGFS_USET_DEFAULT) {
-			mutex_lock(&sd->s_dentry->d_inode->i_mutex);
-			/* Mark that we've taken i_mutex */
+			/* Abort if racing with mkdir() */
+			if (sd->s_type & CONFIGFS_USET_IN_MKDIR) {
+				if (wait_mutex)
+					*wait_mutex = &sd->s_dentry->d_inode->i_mutex;
+				return -EAGAIN;
+			}
+			/* Mark that we're trying to drop the group */
 			sd->s_type |= CONFIGFS_USET_DROPPING;
 
 			/*
 			 * Yup, recursive.  If there's a problem, blame
 			 * deep nesting of default_groups
 			 */
-			ret = configfs_detach_prep(sd->s_dentry);
+			ret = configfs_detach_prep(sd->s_dentry, wait_mutex);
 			if (!ret)
 				continue;
 		} else
@@ -374,7 +406,7 @@ out:
 }
 
 /*
- * Walk the tree, dropping i_mutex wherever CONFIGFS_USET_DROPPING is
+ * Walk the tree, resetting CONFIGFS_USET_DROPPING wherever it was
  * set.
  */
 static void configfs_detach_rollback(struct dentry *dentry)
@@ -385,11 +417,7 @@ static void configfs_detach_rollback(struct dentry *dentry)
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
 		if (sd->s_type & CONFIGFS_USET_DEFAULT) {
 			configfs_detach_rollback(sd->s_dentry);
-
-			if (sd->s_type & CONFIGFS_USET_DROPPING) {
-				sd->s_type &= ~CONFIGFS_USET_DROPPING;
-				mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
-			}
+			sd->s_type &= ~CONFIGFS_USET_DROPPING;
 		}
 	}
 }
@@ -410,7 +438,9 @@ static void detach_attrs(struct config_item * item)
 	list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
 		if (!sd->s_element || !(sd->s_type & CONFIGFS_NOT_PINNED))
 			continue;
+		spin_lock(&configfs_dirent_lock);
 		list_del_init(&sd->s_sibling);
+		spin_unlock(&configfs_dirent_lock);
 		configfs_drop_dentry(sd, dentry);
 		configfs_put(sd);
 	}
@@ -466,16 +496,12 @@ static void detach_groups(struct config_group *group)
 
 		child = sd->s_dentry;
 
+		mutex_lock(&child->d_inode->i_mutex);
+
 		configfs_detach_group(sd->s_element);
 		child->d_inode->i_flags |= S_DEAD;
 
-		/*
-		 * From rmdir/unregister, a configfs_detach_prep() pass
-		 * has taken our i_mutex for us.  Drop it.
-		 * From mkdir/register cleanup, there is no sem held.
-		 */
-		if (sd->s_type & CONFIGFS_USET_DROPPING)
-			mutex_unlock(&child->d_inode->i_mutex);
+		mutex_unlock(&child->d_inode->i_mutex);
 
 		d_delete(child);
 		dput(child);
@@ -1047,25 +1073,24 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	group = NULL;
 	item = NULL;
 	if (type->ct_group_ops->make_group) {
-		group = type->ct_group_ops->make_group(to_config_group(parent_item), name);
-		if (group) {
+		ret = type->ct_group_ops->make_group(to_config_group(parent_item), name, &group);
+		if (!ret) {
 			link_group(to_config_group(parent_item), group);
 			item = &group->cg_item;
 		}
 	} else {
-		item = type->ct_group_ops->make_item(to_config_group(parent_item), name);
-		if (item)
+		ret = type->ct_group_ops->make_item(to_config_group(parent_item), name, &item);
+		if (!ret)
 			link_obj(parent_item, item);
 	}
 	mutex_unlock(&subsys->su_mutex);
 
 	kfree(name);
-	if (!item) {
+	if (ret) {
 		/*
-		 * If item == NULL, then link_obj() was never called.
+		 * If ret != 0, then link_obj() was never called.
 		 * There are no extra references to clean up.
 		 */
-		ret = -ENOMEM;
 		goto out_put;
 	}
 
@@ -1093,11 +1118,26 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	 */
 	module_got = 1;
 
+	/*
+	 * Make racing rmdir() fail if it did not tag parent with
+	 * CONFIGFS_USET_DROPPING
+	 * Note: if CONFIGFS_USET_DROPPING is already set, attach_group() will
+	 * fail and let rmdir() terminate correctly
+	 */
+	spin_lock(&configfs_dirent_lock);
+	/* This will make configfs_detach_prep() fail */
+	sd->s_type |= CONFIGFS_USET_IN_MKDIR;
+	spin_unlock(&configfs_dirent_lock);
+
 	if (group)
 		ret = configfs_attach_group(parent_item, item, dentry);
 	else
 		ret = configfs_attach_item(parent_item, item, dentry);
 
+	spin_lock(&configfs_dirent_lock);
+	sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
+	spin_unlock(&configfs_dirent_lock);
+
 out_unlink:
 	if (ret) {
 		/* Tear down everything we built up */
@@ -1161,12 +1201,27 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 		return -EINVAL;
 	}
 
-	ret = configfs_detach_prep(dentry);
-	if (ret) {
-		configfs_detach_rollback(dentry);
-		config_item_put(parent_item);
-		return ret;
-	}
+	spin_lock(&configfs_dirent_lock);
+	do {
+		struct mutex *wait_mutex;
+
+		ret = configfs_detach_prep(dentry, &wait_mutex);
+		if (ret) {
+			configfs_detach_rollback(dentry);
+			spin_unlock(&configfs_dirent_lock);
+			if (ret != -EAGAIN) {
+				config_item_put(parent_item);
+				return ret;
+			}
+
+			/* Wait until the racing operation terminates */
+			mutex_lock(wait_mutex);
+			mutex_unlock(wait_mutex);
+
+			spin_lock(&configfs_dirent_lock);
+		}
+	} while (ret == -EAGAIN);
+	spin_unlock(&configfs_dirent_lock);
 
 	/* Get a working ref for the duration of this function */
 	item = configfs_get_config_item(dentry);
@@ -1258,7 +1313,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
 	file->private_data = configfs_new_dirent(parent_sd, NULL);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 
-	return file->private_data ? 0 : -ENOMEM;
+	return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0;
 
 }
 
@@ -1268,7 +1323,9 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
 	struct configfs_dirent * cursor = file->private_data;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
+	spin_lock(&configfs_dirent_lock);
 	list_del_init(&cursor->s_sibling);
+	spin_unlock(&configfs_dirent_lock);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	release_configfs_dirent(cursor);
@@ -1308,7 +1365,9 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 			/* fallthrough */
 		default:
 			if (filp->f_pos == 2) {
+				spin_lock(&configfs_dirent_lock);
 				list_move(q, &parent_sd->s_children);
+				spin_unlock(&configfs_dirent_lock);
 			}
 			for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
 				struct configfs_dirent *next;
@@ -1331,7 +1390,9 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 						 dt_type(next)) < 0)
 					return 0;
 
+				spin_lock(&configfs_dirent_lock);
 				list_move(q, p);
+				spin_unlock(&configfs_dirent_lock);
 				p = q;
 				filp->f_pos++;
 			}
@@ -1362,6 +1423,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 			struct list_head *p;
 			loff_t n = file->f_pos - 2;
 
+			spin_lock(&configfs_dirent_lock);
 			list_del(&cursor->s_sibling);
 			p = sd->s_children.next;
 			while (n && p != &sd->s_children) {
@@ -1373,6 +1435,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
 				p = p->next;
 			}
 			list_add_tail(&cursor->s_sibling, p);
+			spin_unlock(&configfs_dirent_lock);
 		}
 	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1448,9 +1511,11 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
 	mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
 			  I_MUTEX_PARENT);
 	mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
-	if (configfs_detach_prep(dentry)) {
+	spin_lock(&configfs_dirent_lock);
+	if (configfs_detach_prep(dentry, NULL)) {
 		printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
 	}
+	spin_unlock(&configfs_dirent_lock);
 	configfs_detach_group(&group->cg_item);
 	dentry->d_inode->i_flags |= S_DEAD;
 	mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index b9a1d810346..4803ccc9448 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -247,7 +247,9 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
 		if (!sd->s_element)
 			continue;
 		if (!strcmp(configfs_get_name(sd), name)) {
+			spin_lock(&configfs_dirent_lock);
 			list_del_init(&sd->s_sibling);
+			spin_unlock(&configfs_dirent_lock);
 			configfs_drop_dentry(sd, dir);
 			configfs_put(sd);
 			break;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 2a731ef5f30..0004d18c40a 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -77,12 +77,15 @@ static int create_link(struct config_item *parent_item,
 	sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
 	if (sl) {
 		sl->sl_target = config_item_get(item);
-		/* FIXME: needs a lock, I'd bet */
+		spin_lock(&configfs_dirent_lock);
 		list_add(&sl->sl_list, &target_sd->s_links);
+		spin_unlock(&configfs_dirent_lock);
 		ret = configfs_create_link(sl, parent_item->ci_dentry,
 					   dentry);
 		if (ret) {
+			spin_lock(&configfs_dirent_lock);
 			list_del_init(&sl->sl_list);
+			spin_unlock(&configfs_dirent_lock);
 			config_item_put(item);
 			kfree(sl);
 		}
@@ -137,8 +140,12 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 		goto out_put;
 
 	ret = type->ct_item_ops->allow_link(parent_item, target_item);
-	if (!ret)
+	if (!ret) {
 		ret = create_link(parent_item, target_item, dentry);
+		if (ret && type->ct_item_ops->drop_link)
+			type->ct_item_ops->drop_link(parent_item,
+						     target_item);
+	}
 
 	config_item_put(target_item);
 	path_put(&nd.path);
@@ -169,7 +176,9 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry)
 	parent_item = configfs_get_config_item(dentry->d_parent);
 	type = parent_item->ci_type;
 
+	spin_lock(&configfs_dirent_lock);
 	list_del_init(&sd->s_sibling);
+	spin_unlock(&configfs_dirent_lock);
 	configfs_drop_dentry(sd, dentry->d_parent);
 	dput(dentry);
 	configfs_put(sd);
@@ -184,8 +193,9 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry)
 		type->ct_item_ops->drop_link(parent_item,
 					       sl->sl_target);
 
-	/* FIXME: Needs lock */
+	spin_lock(&configfs_dirent_lock);
 	list_del_init(&sl->sl_list);
+	spin_unlock(&configfs_dirent_lock);
 
 	/* Put reference from create_link() */
 	config_item_put(sl->sl_target);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index eac23bd288b..492d8caaaf2 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -41,16 +41,20 @@ struct comm;
 struct nodes;
 struct node;
 
-static struct config_group *make_cluster(struct config_group *, const char *);
+static int make_cluster(struct config_group *, const char *,
+			struct config_group **);
 static void drop_cluster(struct config_group *, struct config_item *);
 static void release_cluster(struct config_item *);
-static struct config_group *make_space(struct config_group *, const char *);
+static int make_space(struct config_group *, const char *,
+		      struct config_group **);
 static void drop_space(struct config_group *, struct config_item *);
 static void release_space(struct config_item *);
-static struct config_item *make_comm(struct config_group *, const char *);
+static int make_comm(struct config_group *, const char *,
+		     struct config_item **);
 static void drop_comm(struct config_group *, struct config_item *);
 static void release_comm(struct config_item *);
-static struct config_item *make_node(struct config_group *, const char *);
+static int make_node(struct config_group *, const char *,
+		     struct config_item **);
 static void drop_node(struct config_group *, struct config_item *);
 static void release_node(struct config_item *);
 
@@ -392,8 +396,8 @@ static struct node *to_node(struct config_item *i)
 	return i ? container_of(i, struct node, item) : NULL;
 }
 
-static struct config_group *make_cluster(struct config_group *g,
-					 const char *name)
+static int make_cluster(struct config_group *g, const char *name,
+			struct config_group **new_g)
 {
 	struct cluster *cl = NULL;
 	struct spaces *sps = NULL;
@@ -431,14 +435,15 @@ static struct config_group *make_cluster(struct config_group *g,
 
 	space_list = &sps->ss_group;
 	comm_list = &cms->cs_group;
-	return &cl->group;
+	*new_g = &cl->group;
+	return 0;
 
  fail:
 	kfree(cl);
 	kfree(gps);
 	kfree(sps);
 	kfree(cms);
-	return NULL;
+	return -ENOMEM;
 }
 
 static void drop_cluster(struct config_group *g, struct config_item *i)
@@ -466,7 +471,8 @@ static void release_cluster(struct config_item *i)
 	kfree(cl);
 }
 
-static struct config_group *make_space(struct config_group *g, const char *name)
+static int make_space(struct config_group *g, const char *name,
+		      struct config_group **new_g)
 {
 	struct space *sp = NULL;
 	struct nodes *nds = NULL;
@@ -489,13 +495,14 @@ static struct config_group *make_space(struct config_group *g, const char *name)
 	INIT_LIST_HEAD(&sp->members);
 	mutex_init(&sp->members_lock);
 	sp->members_count = 0;
-	return &sp->group;
+	*new_g = &sp->group;
+	return 0;
 
  fail:
 	kfree(sp);
 	kfree(gps);
 	kfree(nds);
-	return NULL;
+	return -ENOMEM;
 }
 
 static void drop_space(struct config_group *g, struct config_item *i)
@@ -522,19 +529,21 @@ static void release_space(struct config_item *i)
 	kfree(sp);
 }
 
-static struct config_item *make_comm(struct config_group *g, const char *name)
+static int make_comm(struct config_group *g, const char *name,
+		     struct config_item **new_i)
 {
 	struct comm *cm;
 
 	cm = kzalloc(sizeof(struct comm), GFP_KERNEL);
 	if (!cm)
-		return NULL;
+		return -ENOMEM;
 
 	config_item_init_type_name(&cm->item, name, &comm_type);
 	cm->nodeid = -1;
 	cm->local = 0;
 	cm->addr_count = 0;
-	return &cm->item;
+	*new_i = &cm->item;
+	return 0;
 }
 
 static void drop_comm(struct config_group *g, struct config_item *i)
@@ -554,14 +563,15 @@ static void release_comm(struct config_item *i)
 	kfree(cm);
 }
 
-static struct config_item *make_node(struct config_group *g, const char *name)
+static int make_node(struct config_group *g, const char *name,
+		     struct config_item **new_i)
 {
 	struct space *sp = to_space(g->cg_item.ci_parent);
 	struct node *nd;
 
 	nd = kzalloc(sizeof(struct node), GFP_KERNEL);
 	if (!nd)
-		return NULL;
+		return -ENOMEM;
 
 	config_item_init_type_name(&nd->item, name, &node_type);
 	nd->nodeid = -1;
@@ -573,7 +583,8 @@ static struct config_item *make_node(struct config_group *g, const char *name)
 	sp->members_count++;
 	mutex_unlock(&sp->members_lock);
 
-	return &nd->item;
+	*new_i = &nd->item;
+	return 0;
 }
 
 static void drop_node(struct config_group *g, struct config_item *i)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 17964c0505a..1db080135c6 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -174,10 +174,17 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 	 * need to use BH_New is when we're extending i_size on a file
 	 * system which doesn't support holes, in which case BH_New
 	 * allows block_prepare_write() to zero.
+	 *
+	 * If we see this on a sparse file system, then a truncate has
+	 * raced us and removed the cluster. In this case, we clear
+	 * the buffers dirty and uptodate bits and let the buffer code
+	 * ignore it as a hole.
 	 */
-	mlog_bug_on_msg(create && p_blkno == 0 && ocfs2_sparse_alloc(osb),
-			"ino %lu, iblock %llu\n", inode->i_ino,
-			(unsigned long long)iblock);
+	if (create && p_blkno == 0 && ocfs2_sparse_alloc(osb)) {
+		clear_buffer_dirty(bh_result);
+		clear_buffer_uptodate(bh_result);
+		goto bail;
+	}
 
 	/* Treat the unwritten extent as a hole for zeroing purposes. */
 	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f02ccb34604..443d108211a 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1489,25 +1489,28 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group
 		: NULL;
 }
 
-static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
-							  const char *name)
+static int o2hb_heartbeat_group_make_item(struct config_group *group,
+					  const char *name,
+					  struct config_item **new_item)
 {
 	struct o2hb_region *reg = NULL;
-	struct config_item *ret = NULL;
+	int ret = 0;
 
 	reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL);
-	if (reg == NULL)
-		goto out; /* ENOMEM */
+	if (reg == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type);
 
-	ret = &reg->hr_item;
+	*new_item = &reg->hr_item;
 
 	spin_lock(&o2hb_live_lock);
 	list_add_tail(&reg->hr_all_item, &o2hb_all_regions);
 	spin_unlock(&o2hb_live_lock);
 out:
-	if (ret == NULL)
+	if (ret)
 		kfree(reg);
 
 	return ret;
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 7bf3c0ea7bd..d8bfa0eb41b 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -146,8 +146,10 @@ static int nst_seq_show(struct seq_file *seq, void *v)
 			   nst->st_task->comm, nst->st_node,
 			   nst->st_sc, nst->st_id, nst->st_msg_type,
 			   nst->st_msg_key,
-			   nst->st_sock_time.tv_sec, nst->st_sock_time.tv_usec,
-			   nst->st_send_time.tv_sec, nst->st_send_time.tv_usec,
+			   nst->st_sock_time.tv_sec,
+			   (unsigned long)nst->st_sock_time.tv_usec,
+			   nst->st_send_time.tv_sec,
+			   (unsigned long)nst->st_send_time.tv_usec,
 			   nst->st_status_time.tv_sec,
 			   nst->st_status_time.tv_usec);
 	}
@@ -274,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	return sc; /* unused, just needs to be null when done */
 }
 
-#define TV_SEC_USEC(TV) TV.tv_sec, TV.tv_usec
+#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec
 
 static int sc_seq_show(struct seq_file *seq, void *v)
 {
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index cfdb08b484e..b364b7052e4 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -644,27 +644,32 @@ out:
 	return ret;
 }
 
-static struct config_item *o2nm_node_group_make_item(struct config_group *group,
-						     const char *name)
+static int o2nm_node_group_make_item(struct config_group *group,
+				     const char *name,
+				     struct config_item **new_item)
 {
 	struct o2nm_node *node = NULL;
-	struct config_item *ret = NULL;
+	int ret = 0;
 
-	if (strlen(name) > O2NM_MAX_NAME_LEN)
-		goto out; /* ENAMETOOLONG */
+	if (strlen(name) > O2NM_MAX_NAME_LEN) {
+		ret = -ENAMETOOLONG;
+		goto out;
+	}
 
 	node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL);
-	if (node == NULL)
-		goto out; /* ENOMEM */
+	if (node == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */
 	config_item_init_type_name(&node->nd_item, name, &o2nm_node_type);
 	spin_lock_init(&node->nd_lock);
 
-	ret = &node->nd_item;
+	*new_item = &node->nd_item;
 
 out:
-	if (ret == NULL)
+	if (ret)
 		kfree(node);
 
 	return ret;
@@ -751,25 +756,31 @@ static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *gro
 }
 #endif
 
-static struct config_group *o2nm_cluster_group_make_group(struct config_group *group,
-							  const char *name)
+static int o2nm_cluster_group_make_group(struct config_group *group,
+					 const char *name,
+					 struct config_group **new_group)
 {
 	struct o2nm_cluster *cluster = NULL;
 	struct o2nm_node_group *ns = NULL;
-	struct config_group *o2hb_group = NULL, *ret = NULL;
+	struct config_group *o2hb_group = NULL;
 	void *defs = NULL;
+	int ret = 0;
 
 	/* this runs under the parent dir's i_mutex; there can be only
 	 * one caller in here at a time */
-	if (o2nm_single_cluster)
-		goto out; /* ENOSPC */
+	if (o2nm_single_cluster) {
+		ret = -ENOSPC;
+		goto out;
+	}
 
 	cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL);
 	ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL);
 	defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
 	o2hb_group = o2hb_alloc_hb_set();
-	if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
+	if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL) {
+		ret = -ENOMEM;
 		goto out;
+	}
 
 	config_group_init_type_name(&cluster->cl_group, name,
 				    &o2nm_cluster_type);
@@ -786,11 +797,11 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
 	cluster->cl_idle_timeout_ms    = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
 	cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
 
-	ret = &cluster->cl_group;
+	*new_group = &cluster->cl_group;
 	o2nm_single_cluster = cluster;
 
 out:
-	if (ret == NULL) {
+	if (ret) {
 		kfree(cluster);
 		kfree(ns);
 		o2hb_free_hb_set(o2hb_group);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 80e20d9f278..eae3d643a5e 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -31,6 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/time.h>
 
 #define MLOG_MASK_PREFIX ML_DLM_GLUE
 #include <cluster/masklog.h>
@@ -59,6 +60,9 @@ struct ocfs2_mask_waiter {
 	struct completion	mw_complete;
 	unsigned long		mw_mask;
 	unsigned long		mw_goal;
+#ifdef CONFIG_OCFS2_FS_STATS
+	unsigned long long 	mw_lock_start;
+#endif
 };
 
 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
@@ -366,6 +370,75 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
 	spin_unlock(&ocfs2_dlm_tracking_lock);
 }
 
+#ifdef CONFIG_OCFS2_FS_STATS
+static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
+{
+	res->l_lock_num_prmode = 0;
+	res->l_lock_num_prmode_failed = 0;
+	res->l_lock_total_prmode = 0;
+	res->l_lock_max_prmode = 0;
+	res->l_lock_num_exmode = 0;
+	res->l_lock_num_exmode_failed = 0;
+	res->l_lock_total_exmode = 0;
+	res->l_lock_max_exmode = 0;
+	res->l_lock_refresh = 0;
+}
+
+static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
+				    struct ocfs2_mask_waiter *mw, int ret)
+{
+	unsigned long long *num, *sum;
+	unsigned int *max, *failed;
+	struct timespec ts = current_kernel_time();
+	unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start;
+
+	if (level == LKM_PRMODE) {
+		num = &res->l_lock_num_prmode;
+		sum = &res->l_lock_total_prmode;
+		max = &res->l_lock_max_prmode;
+		failed = &res->l_lock_num_prmode_failed;
+	} else if (level == LKM_EXMODE) {
+		num = &res->l_lock_num_exmode;
+		sum = &res->l_lock_total_exmode;
+		max = &res->l_lock_max_exmode;
+		failed = &res->l_lock_num_exmode_failed;
+	} else
+		return;
+
+	(*num)++;
+	(*sum) += time;
+	if (time > *max)
+		*max = time;
+	if (ret)
+		(*failed)++;
+}
+
+static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
+{
+	lockres->l_lock_refresh++;
+}
+
+static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
+{
+	struct timespec ts = current_kernel_time();
+	mw->mw_lock_start = timespec_to_ns(&ts);
+}
+#else
+static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
+{
+}
+static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
+			   int level, struct ocfs2_mask_waiter *mw, int ret)
+{
+}
+static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
+{
+}
+static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
+{
+}
+#endif
+
 static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
 				       struct ocfs2_lock_res *res,
 				       enum ocfs2_lock_type type,
@@ -385,6 +458,8 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
 	res->l_flags         = OCFS2_LOCK_INITIALIZED;
 
 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
+
+	ocfs2_init_lock_stats(res);
 }
 
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
@@ -1048,6 +1123,7 @@ static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
 {
 	INIT_LIST_HEAD(&mw->mw_item);
 	init_completion(&mw->mw_complete);
+	ocfs2_init_start_time(mw);
 }
 
 static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
@@ -1254,6 +1330,7 @@ out:
 			goto again;
 		mlog_errno(ret);
 	}
+	ocfs2_update_lock_stats(lockres, level, &mw, ret);
 
 	mlog_exit(ret);
 	return ret;
@@ -1983,6 +2060,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
 				le32_to_cpu(fe->i_flags));
 
 		ocfs2_refresh_inode(inode, fe);
+		ocfs2_track_lock_refresh(lockres);
 	}
 
 	status = 0;
@@ -2267,6 +2345,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
 
 		if (status < 0)
 			mlog_errno(status);
+		ocfs2_track_lock_refresh(lockres);
 	}
 bail:
 	mlog_exit(status);
@@ -2461,7 +2540,7 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
 }
 
 /* So that debugfs.ocfs2 can determine which format is being used */
-#define OCFS2_DLM_DEBUG_STR_VERSION 1
+#define OCFS2_DLM_DEBUG_STR_VERSION 2
 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
 {
 	int i;
@@ -2502,6 +2581,47 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
 	for(i = 0; i < DLM_LVB_LEN; i++)
 		seq_printf(m, "0x%x\t", lvb[i]);
 
+#ifdef CONFIG_OCFS2_FS_STATS
+# define lock_num_prmode(_l)		(_l)->l_lock_num_prmode
+# define lock_num_exmode(_l)		(_l)->l_lock_num_exmode
+# define lock_num_prmode_failed(_l)	(_l)->l_lock_num_prmode_failed
+# define lock_num_exmode_failed(_l)	(_l)->l_lock_num_exmode_failed
+# define lock_total_prmode(_l)		(_l)->l_lock_total_prmode
+# define lock_total_exmode(_l)		(_l)->l_lock_total_exmode
+# define lock_max_prmode(_l)		(_l)->l_lock_max_prmode
+# define lock_max_exmode(_l)		(_l)->l_lock_max_exmode
+# define lock_refresh(_l)		(_l)->l_lock_refresh
+#else
+# define lock_num_prmode(_l)		(0ULL)
+# define lock_num_exmode(_l)		(0ULL)
+# define lock_num_prmode_failed(_l)	(0)
+# define lock_num_exmode_failed(_l)	(0)
+# define lock_total_prmode(_l)		(0ULL)
+# define lock_total_exmode(_l)		(0ULL)
+# define lock_max_prmode(_l)		(0)
+# define lock_max_exmode(_l)		(0)
+# define lock_refresh(_l)		(0)
+#endif
+	/* The following seq_print was added in version 2 of this output */
+	seq_printf(m, "%llu\t"
+		   "%llu\t"
+		   "%u\t"
+		   "%u\t"
+		   "%llu\t"
+		   "%llu\t"
+		   "%u\t"
+		   "%u\t"
+		   "%u\t",
+		   lock_num_prmode(lockres),
+		   lock_num_exmode(lockres),
+		   lock_num_prmode_failed(lockres),
+		   lock_num_exmode_failed(lockres),
+		   lock_total_prmode(lockres),
+		   lock_total_exmode(lockres),
+		   lock_max_prmode(lockres),
+		   lock_max_exmode(lockres),
+		   lock_refresh(lockres));
+
 	/* End the line */
 	seq_printf(m, "\n");
 	return 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 57e0d30cde9..e8514e8b6ce 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2202,7 +2202,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 
 	ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
 	if (ret == -EINVAL)
-		mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");
+		mlog(0, "generic_file_aio_read returned -EINVAL\n");
 
 	/* buffered aio wouldn't have proper lock coverage today */
 	BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 9698338adc3..a8c19cb3cfd 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -329,7 +329,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
 
 	mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
 
-#ifdef OCFS2_DEBUG_FS
+#ifdef CONFIG_OCFS2_DEBUG_FS
 	status = 1;
 #else
 	status = journal_extend(handle, nblocks);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index be774bdc8b3..28e492e4ec8 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -498,7 +498,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 
 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
 
-#ifdef OCFS2_DEBUG_FS
+#ifdef CONFIG_OCFS2_DEBUG_FS
 	if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
 	    ocfs2_local_alloc_count_bits(alloc)) {
 		ocfs2_error(osb->sb, "local alloc inode %llu says it has "
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 31692379c17..1cb814be8ef 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -132,6 +132,18 @@ struct ocfs2_lock_res {
 	wait_queue_head_t        l_event;
 
 	struct list_head         l_debug_list;
+
+#ifdef CONFIG_OCFS2_FS_STATS
+	unsigned long long	 l_lock_num_prmode; 	   /* PR acquires */
+	unsigned long long 	 l_lock_num_exmode; 	   /* EX acquires */
+	unsigned int		 l_lock_num_prmode_failed; /* Failed PR gets */
+	unsigned int		 l_lock_num_exmode_failed; /* Failed EX gets */
+	unsigned long long	 l_lock_total_prmode; 	   /* Tot wait for PR */
+	unsigned long long	 l_lock_total_exmode; 	   /* Tot wait for EX */
+	unsigned int		 l_lock_max_prmode; 	   /* Max wait for PR */
+	unsigned int		 l_lock_max_exmode; 	   /* Max wait for EX */
+	unsigned int		 l_lock_refresh;	   /* Disk refreshes */
+#endif
 };
 
 struct ocfs2_dlm_debug {
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 52c42666515..3f194517762 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -901,7 +901,7 @@ static inline int ocfs2_sprintf_system_inode_name(char *buf, int len,
          * list has a copy per slot.
          */
 	if (type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE)
-		chars = snprintf(buf, len,
+		chars = snprintf(buf, len, "%s",
 				 ocfs2_system_inodes[type].si_name);
 	else
 		chars = snprintf(buf, len,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index bd7e0f3acfc..353fc35c674 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -550,26 +550,17 @@ static ssize_t ocfs2_control_read(struct file *file,
 				  size_t count,
 				  loff_t *ppos)
 {
-	char *proto_string = OCFS2_CONTROL_PROTO;
-	size_t to_write = 0;
-
-	if (*ppos >= OCFS2_CONTROL_PROTO_LEN)
-		return 0;
-
-	to_write = OCFS2_CONTROL_PROTO_LEN - *ppos;
-	if (to_write > count)
-		to_write = count;
-	if (copy_to_user(buf, proto_string + *ppos, to_write))
-		return -EFAULT;
+	ssize_t ret;
 
-	*ppos += to_write;
+	ret = simple_read_from_buffer(buf, count, ppos,
+			OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN);
 
 	/* Have we read the whole protocol list? */
-	if (*ppos >= OCFS2_CONTROL_PROTO_LEN)
+	if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN)
 		ocfs2_control_set_handshake_state(file,
 						  OCFS2_CONTROL_HANDSHAKE_READ);
 
-	return to_write;
+	return ret;
 }
 
 static int ocfs2_control_release(struct inode *inode, struct file *file)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index df63ba20ae9..ccecfe5094f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1703,7 +1703,11 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
 	local = ocfs2_mount_local(osb);
 
 	/* will play back anything left in the journal. */
-	ocfs2_journal_load(osb->journal, local);
+	status = ocfs2_journal_load(osb->journal, local);
+	if (status < 0) {
+		mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
+		goto finally;
+	}
 
 	if (dirty) {
 		/* recover my local alloc if we didn't unmount cleanly. */
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-07-17 10:55:51 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-07-17 10:55:51 -0700
commit	5b664cb235e97afbf34db9c4d77f08ebd725335e (patch)
tree	518540649c38342209790de8e0b575ac1a6fa722 /fs
parent	f39548a6ad1dbdfaab552419386ec5bb1d76fa0d (diff)
parent	c0420ad2ca514551ca086510b0e7d17a05c70492 (diff)