diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-16 12:11:57 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-16 12:11:57 -0700 | 
| commit | 300df7dc89cc276377fc020704e34875d5c473b6 (patch) | |
| tree | 6b280000815b0562255cecf3da1a8c9597bce702 | |
| parent | 661adc423d70203a56723701aaf255e16950dfdb (diff) | |
| parent | 9af0b38ff3f4f79c62dd909405b113bf7c1a23aa (diff) | |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2:
  ocfs2/net: Use wait_event() in o2net_send_message_vec()
  ocfs2: Adjust rightmost path in ocfs2_add_branch.
  ocfs2: fdatasync should skip unimportant metadata writeout
  ocfs2: Remove redundant gotos in ocfs2_mount_volume()
  ocfs2: Add statistics for the checksum and ecc operations.
  ocfs2 patch to track delayed orphan scan timer statistics
  ocfs2: timer to queue scan of all orphan slots
  ocfs2: Correct ordering of ip_alloc_sem and localloc locks for directories
  ocfs2: Fix possible deadlock in quota recovery
  ocfs2: Fix possible deadlock with quotas in ocfs2_setattr()
  ocfs2: Fix lock inversion in ocfs2_local_read_info()
  ocfs2: Fix possible deadlock in ocfs2_global_read_dquot()
  ocfs2: update comments in masklog.h
  ocfs2: Don't printk the error when listing too many xattrs.
| -rw-r--r-- | fs/ocfs2/alloc.c | 80 | ||||
| -rw-r--r-- | fs/ocfs2/blockcheck.c | 184 | ||||
| -rw-r--r-- | fs/ocfs2/blockcheck.h | 29 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/masklog.h | 35 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/tcp.c | 7 | ||||
| -rw-r--r-- | fs/ocfs2/dir.c | 21 | ||||
| -rw-r--r-- | fs/ocfs2/dlmglue.c | 51 | ||||
| -rw-r--r-- | fs/ocfs2/dlmglue.h | 11 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 56 | ||||
| -rw-r--r-- | fs/ocfs2/journal.c | 111 | ||||
| -rw-r--r-- | fs/ocfs2/journal.h | 4 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2.h | 16 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2_lockid.h | 5 | ||||
| -rw-r--r-- | fs/ocfs2/quota_global.c | 4 | ||||
| -rw-r--r-- | fs/ocfs2/quota_local.c | 21 | ||||
| -rw-r--r-- | fs/ocfs2/super.c | 66 | ||||
| -rw-r--r-- | fs/ocfs2/xattr.c | 5 | 
17 files changed, 610 insertions, 96 deletions
| diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 678a067d925..9edcde4974a 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -475,6 +475,12 @@ struct ocfs2_path {  #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)  #define path_num_items(_path) ((_path)->p_tree_depth + 1) +static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, +			   u32 cpos); +static void ocfs2_adjust_rightmost_records(struct inode *inode, +					   handle_t *handle, +					   struct ocfs2_path *path, +					   struct ocfs2_extent_rec *insert_rec);  /*   * Reset the actual path elements so that we can re-use the structure   * to build another path. Generally, this involves freeing the buffer @@ -1013,6 +1019,54 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list  *el)  }  /* + * Change range of the branches in the right most path according to the leaf + * extent block's rightmost record. + */ +static int ocfs2_adjust_rightmost_branch(handle_t *handle, +					 struct inode *inode, +					 struct ocfs2_extent_tree *et) +{ +	int status; +	struct ocfs2_path *path = NULL; +	struct ocfs2_extent_list *el; +	struct ocfs2_extent_rec *rec; + +	path = ocfs2_new_path_from_et(et); +	if (!path) { +		status = -ENOMEM; +		return status; +	} + +	status = ocfs2_find_path(inode, path, UINT_MAX); +	if (status < 0) { +		mlog_errno(status); +		goto out; +	} + +	status = ocfs2_extend_trans(handle, path_num_items(path) + +				    handle->h_buffer_credits); +	if (status < 0) { +		mlog_errno(status); +		goto out; +	} + +	status = ocfs2_journal_access_path(inode, handle, path); +	if (status < 0) { +		mlog_errno(status); +		goto out; +	} + +	el = path_leaf_el(path); +	rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; + +	ocfs2_adjust_rightmost_records(inode, handle, path, rec); + +out: +	ocfs2_free_path(path); +	return status; +} + +/*   * Add an entire tree branch to our inode. eb_bh is the extent block   * to start at, if we don't want to start the branch at the dinode   * structure. @@ -1038,7 +1092,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,  	struct ocfs2_extent_block *eb;  	struct ocfs2_extent_list  *eb_el;  	struct ocfs2_extent_list  *el; -	u32 new_cpos; +	u32 new_cpos, root_end;  	mlog_entry_void(); @@ -1055,6 +1109,27 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,  	new_blocks = le16_to_cpu(el->l_tree_depth); +	eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; +	new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); +	root_end = ocfs2_sum_rightmost_rec(et->et_root_el); + +	/* +	 * If there is a gap before the root end and the real end +	 * of the righmost leaf block, we need to remove the gap +	 * between new_cpos and root_end first so that the tree +	 * is consistent after we add a new branch(it will start +	 * from new_cpos). +	 */ +	if (root_end > new_cpos) { +		mlog(0, "adjust the cluster end from %u to %u\n", +		     root_end, new_cpos); +		status = ocfs2_adjust_rightmost_branch(handle, inode, et); +		if (status) { +			mlog_errno(status); +			goto bail; +		} +	} +  	/* allocate the number of new eb blocks we need */  	new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),  			     GFP_KERNEL); @@ -1071,9 +1146,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,  		goto bail;  	} -	eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; -	new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); -  	/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be  	 * linked with the rest of the tree.  	 * conversly, new_eb_bhs[0] is the new bottommost leaf. diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 2a947c44e59..a1163b8b417 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -22,6 +22,9 @@  #include <linux/crc32.h>  #include <linux/buffer_head.h>  #include <linux/bitops.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/fs.h>  #include <asm/byteorder.h>  #include <cluster/masklog.h> @@ -222,6 +225,155 @@ void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,  	ocfs2_hamming_fix(data, blocksize * 8, 0, fix);  } + +/* + * Debugfs handling. + */ + +#ifdef CONFIG_DEBUG_FS + +static int blockcheck_u64_get(void *data, u64 *val) +{ +	*val = *(u64 *)data; +	return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n"); + +static struct dentry *blockcheck_debugfs_create(const char *name, +						struct dentry *parent, +						u64 *value) +{ +	return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value, +				   &blockcheck_fops); +} + +static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) +{ +	if (stats) { +		debugfs_remove(stats->b_debug_check); +		stats->b_debug_check = NULL; +		debugfs_remove(stats->b_debug_failure); +		stats->b_debug_failure = NULL; +		debugfs_remove(stats->b_debug_recover); +		stats->b_debug_recover = NULL; +		debugfs_remove(stats->b_debug_dir); +		stats->b_debug_dir = NULL; +	} +} + +static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, +					  struct dentry *parent) +{ +	int rc = -EINVAL; + +	if (!stats) +		goto out; + +	stats->b_debug_dir = debugfs_create_dir("blockcheck", parent); +	if (!stats->b_debug_dir) +		goto out; + +	stats->b_debug_check = +		blockcheck_debugfs_create("blocks_checked", +					  stats->b_debug_dir, +					  &stats->b_check_count); + +	stats->b_debug_failure = +		blockcheck_debugfs_create("checksums_failed", +					  stats->b_debug_dir, +					  &stats->b_failure_count); + +	stats->b_debug_recover = +		blockcheck_debugfs_create("ecc_recoveries", +					  stats->b_debug_dir, +					  &stats->b_recover_count); +	if (stats->b_debug_check && stats->b_debug_failure && +	    stats->b_debug_recover) +		rc = 0; + +out: +	if (rc) +		ocfs2_blockcheck_debug_remove(stats); +	return rc; +} +#else +static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, +						 struct dentry *parent) +{ +	return 0; +} + +static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) +{ +} +#endif  /* CONFIG_DEBUG_FS */ + +/* Always-called wrappers for starting and stopping the debugfs files */ +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, +					   struct dentry *parent) +{ +	return ocfs2_blockcheck_debug_install(stats, parent); +} + +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats) +{ +	ocfs2_blockcheck_debug_remove(stats); +} + +static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats) +{ +	u64 new_count; + +	if (!stats) +		return; + +	spin_lock(&stats->b_lock); +	stats->b_check_count++; +	new_count = stats->b_check_count; +	spin_unlock(&stats->b_lock); + +	if (!new_count) +		mlog(ML_NOTICE, "Block check count has wrapped\n"); +} + +static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats) +{ +	u64 new_count; + +	if (!stats) +		return; + +	spin_lock(&stats->b_lock); +	stats->b_failure_count++; +	new_count = stats->b_failure_count; +	spin_unlock(&stats->b_lock); + +	if (!new_count) +		mlog(ML_NOTICE, "Checksum failure count has wrapped\n"); +} + +static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats) +{ +	u64 new_count; + +	if (!stats) +		return; + +	spin_lock(&stats->b_lock); +	stats->b_recover_count++; +	new_count = stats->b_recover_count; +	spin_unlock(&stats->b_lock); + +	if (!new_count) +		mlog(ML_NOTICE, "ECC recovery count has wrapped\n"); +} + + + +/* + * These are the low-level APIs for using the ocfs2_block_check structure. + */ +  /*   * This function generates check information for a block.   * data is the block to be checked.  bc is a pointer to the @@ -266,12 +418,15 @@ void ocfs2_block_check_compute(void *data, size_t blocksize,   * Again, the data passed in should be the on-disk endian.   */  int ocfs2_block_check_validate(void *data, size_t blocksize, -			       struct ocfs2_block_check *bc) +			       struct ocfs2_block_check *bc, +			       struct ocfs2_blockcheck_stats *stats)  {  	int rc = 0;  	struct ocfs2_block_check check;  	u32 crc, ecc; +	ocfs2_blockcheck_inc_check(stats); +  	check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);  	check.bc_ecc = le16_to_cpu(bc->bc_ecc); @@ -282,6 +437,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,  	if (crc == check.bc_crc32e)  		goto out; +	ocfs2_blockcheck_inc_failure(stats);  	mlog(ML_ERROR,  	     "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",  	     (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -292,8 +448,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,  	/* And check the crc32 again */  	crc = crc32_le(~0, data, blocksize); -	if (crc == check.bc_crc32e) +	if (crc == check.bc_crc32e) { +		ocfs2_blockcheck_inc_recover(stats);  		goto out; +	}  	mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",  	     (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -366,7 +524,8 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,   * Again, the data passed in should be the on-disk endian.   */  int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, -				   struct ocfs2_block_check *bc) +				   struct ocfs2_block_check *bc, +				   struct ocfs2_blockcheck_stats *stats)  {  	int i, rc = 0;  	struct ocfs2_block_check check; @@ -377,6 +536,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,  	if (!nr)  		return 0; +	ocfs2_blockcheck_inc_check(stats); +  	check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);  	check.bc_ecc = le16_to_cpu(bc->bc_ecc); @@ -388,6 +549,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,  	if (crc == check.bc_crc32e)  		goto out; +	ocfs2_blockcheck_inc_failure(stats);  	mlog(ML_ERROR,  	     "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",  	     (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -416,8 +578,10 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,  	/* And check the crc32 again */  	for (i = 0, crc = ~0; i < nr; i++)  		crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); -	if (crc == check.bc_crc32e) +	if (crc == check.bc_crc32e) { +		ocfs2_blockcheck_inc_recover(stats);  		goto out; +	}  	mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",  	     (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -448,9 +612,11 @@ int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,  			    struct ocfs2_block_check *bc)  {  	int rc = 0; +	struct ocfs2_super *osb = OCFS2_SB(sb); -	if (ocfs2_meta_ecc(OCFS2_SB(sb))) -		rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc); +	if (ocfs2_meta_ecc(osb)) +		rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc, +						&osb->osb_ecc_stats);  	return rc;  } @@ -468,9 +634,11 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,  				struct ocfs2_block_check *bc)  {  	int rc = 0; +	struct ocfs2_super *osb = OCFS2_SB(sb); -	if (ocfs2_meta_ecc(OCFS2_SB(sb))) -		rc = ocfs2_block_check_validate_bhs(bhs, nr, bc); +	if (ocfs2_meta_ecc(osb)) +		rc = ocfs2_block_check_validate_bhs(bhs, nr, bc, +						    &osb->osb_ecc_stats);  	return rc;  } diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h index 70ec3feda32..d4b69febf70 100644 --- a/fs/ocfs2/blockcheck.h +++ b/fs/ocfs2/blockcheck.h @@ -21,6 +21,24 @@  #define OCFS2_BLOCKCHECK_H +/* Count errors and error correction from blockcheck.c */ +struct ocfs2_blockcheck_stats { +	spinlock_t b_lock; +	u64 b_check_count;	/* Number of blocks we've checked */ +	u64 b_failure_count;	/* Number of failed checksums */ +	u64 b_recover_count;	/* Number of blocks fixed by ecc */ + +	/* +	 * debugfs entries, used if this is passed to +	 * ocfs2_blockcheck_stats_debugfs_install() +	 */ +	struct dentry *b_debug_dir;	/* Parent of the debugfs  files */ +	struct dentry *b_debug_check;	/* Exposes b_check_count */ +	struct dentry *b_debug_failure;	/* Exposes b_failure_count */ +	struct dentry *b_debug_recover;	/* Exposes b_recover_count */ +}; + +  /* High level block API */  void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,  			    struct ocfs2_block_check *bc); @@ -37,11 +55,18 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,  void ocfs2_block_check_compute(void *data, size_t blocksize,  			       struct ocfs2_block_check *bc);  int ocfs2_block_check_validate(void *data, size_t blocksize, -			       struct ocfs2_block_check *bc); +			       struct ocfs2_block_check *bc, +			       struct ocfs2_blockcheck_stats *stats);  void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,  				   struct ocfs2_block_check *bc);  int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, -				   struct ocfs2_block_check *bc); +				   struct ocfs2_block_check *bc, +				   struct ocfs2_blockcheck_stats *stats); + +/* Debug Initialization */ +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, +					   struct dentry *parent); +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats);  /*   * Hamming code functions diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 7e72a81bc2d..696c32e5071 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -48,34 +48,33 @@   * only emit the appropriage printk() when the caller passes in a constant   * mask, as is almost always the case.   * - * All this bitmask nonsense is hidden from the /proc interface so that Joel - * doesn't have an aneurism.  Reading the file gives a straight forward - * indication of which bits are on or off: - * 	ENTRY off - * 	EXIT off + * All this bitmask nonsense is managed from the files under + * /sys/fs/o2cb/logmask/.  Reading the files gives a straightforward + * indication of which bits are allowed (allow) or denied (off/deny). + * 	ENTRY deny + * 	EXIT deny   * 	TCP off   * 	MSG off   * 	SOCKET off - * 	ERROR off - * 	NOTICE on + * 	ERROR allow + * 	NOTICE allow   *   * Writing changes the state of a given bit and requires a strictly formatted   * single write() call:   * - * 	write(fd, "ENTRY on", 8); + * 	write(fd, "allow", 5);   * - * would turn the entry bit on.  "1" is also accepted in the place of "on", and - * "off" and "0" behave as expected. + * Echoing allow/deny/off string into the logmask files can flip the bits + * on or off as expected; here is the bash script for example:   * - * Some trivial shell can flip all the bits on or off: + * log_mask="/sys/fs/o2cb/log_mask" + * for node in ENTRY EXIT TCP MSG SOCKET ERROR NOTICE; do + *	echo allow >"$log_mask"/"$node" + * done   * - * log_mask="/proc/fs/ocfs2_nodemanager/log_mask" - * cat $log_mask | ( - * 	while read bit status; do - * 		# $1 is "on" or "off", say - * 		echo "$bit $1" > $log_mask - * 	done - * ) + * The debugfs.ocfs2 tool can also flip the bits with the -l option: + * + * debugfs.ocfs2 -l TCP allow   */  /* for task_struct */ diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 9fbe849f634..334f231a422 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -974,7 +974,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,  int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,  			   size_t caller_veclen, u8 target_node, int *status)  { -	int ret, error = 0; +	int ret;  	struct o2net_msg *msg = NULL;  	size_t veclen, caller_bytes = 0;  	struct kvec *vec = NULL; @@ -1015,10 +1015,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,  	o2net_set_nst_sock_time(&nst); -	ret = wait_event_interruptible(nn->nn_sc_wq, -				       o2net_tx_can_proceed(nn, &sc, &error)); -	if (!ret && error) -		ret = error; +	wait_event(nn->nn_sc_wq, o2net_tx_can_proceed(nn, &sc, &ret));  	if (ret)  		goto out; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c5752305627..b358f3bf896 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2900,6 +2900,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,  	alloc = ocfs2_clusters_for_bytes(sb, bytes);  	dx_alloc = 0; +	down_write(&oi->ip_alloc_sem); +  	if (ocfs2_supports_indexed_dirs(osb)) {  		credits += ocfs2_add_dir_index_credits(sb); @@ -2940,8 +2942,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,  		goto out;  	} -	down_write(&oi->ip_alloc_sem); -  	/*  	 * Prepare for worst case allocation scenario of two separate  	 * extents in the unindexed tree. @@ -2953,7 +2953,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,  	if (IS_ERR(handle)) {  		ret = PTR_ERR(handle);  		mlog_errno(ret); -		goto out_sem; +		goto out;  	}  	if (vfs_dq_alloc_space_nodirty(dir, @@ -3172,10 +3172,8 @@ out_commit:  	ocfs2_commit_trans(osb, handle); -out_sem: -	up_write(&oi->ip_alloc_sem); -  out: +	up_write(&oi->ip_alloc_sem);  	if (data_ac)  		ocfs2_free_alloc_context(data_ac);  	if (meta_ac) @@ -3322,11 +3320,15 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,  		brelse(new_bh);  		new_bh = NULL; +		down_write(&OCFS2_I(dir)->ip_alloc_sem); +		drop_alloc_sem = 1;  		dir_i_size = i_size_read(dir);  		credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;  		goto do_extend;  	} +	down_write(&OCFS2_I(dir)->ip_alloc_sem); +	drop_alloc_sem = 1;  	dir_i_size = i_size_read(dir);  	mlog(0, "extending dir %llu (i_size = %lld)\n",  	     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); @@ -3370,9 +3372,6 @@ do_extend:  		credits++; /* For attaching the new dirent block to the  			    * dx_root */ -	down_write(&OCFS2_I(dir)->ip_alloc_sem); -	drop_alloc_sem = 1; -  	handle = ocfs2_start_trans(osb, credits);  	if (IS_ERR(handle)) {  		status = PTR_ERR(handle); @@ -3435,10 +3434,10 @@ bail_bh:  	*new_de_bh = new_bh;  	get_bh(*new_de_bh);  bail: -	if (drop_alloc_sem) -		up_write(&OCFS2_I(dir)->ip_alloc_sem);  	if (handle)  		ocfs2_commit_trans(osb, handle); +	if (drop_alloc_sem) +		up_write(&OCFS2_I(dir)->ip_alloc_sem);  	if (data_ac)  		ocfs2_free_alloc_context(data_ac); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index e15fc7d5082..6cdeaa76f27 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -248,6 +248,10 @@ static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {  	.flags		= 0,  }; +static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { +	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, +}; +  static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {  	.get_osb	= ocfs2_get_dentry_osb,  	.post_unlock	= ocfs2_dentry_post_unlock, @@ -637,6 +641,19 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,  				   &ocfs2_nfs_sync_lops, osb);  } +static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, +					    struct ocfs2_super *osb) +{ +	struct ocfs2_orphan_scan_lvb *lvb; + +	ocfs2_lock_res_init_once(res); +	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); +	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, +				   &ocfs2_orphan_scan_lops, osb); +	lvb = ocfs2_dlm_lvb(&res->l_lksb); +	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; +} +  void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,  			      struct ocfs2_file_private *fp)  { @@ -2352,6 +2369,37 @@ void ocfs2_inode_unlock(struct inode *inode,  	mlog_exit_void();  } +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) +{ +	struct ocfs2_lock_res *lockres; +	struct ocfs2_orphan_scan_lvb *lvb; +	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; +	int status = 0; + +	lockres = &osb->osb_orphan_scan.os_lockres; +	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); +	if (status < 0) +		return status; + +	lvb = ocfs2_dlm_lvb(&lockres->l_lksb); +	if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) +		*seqno = be32_to_cpu(lvb->lvb_os_seqno); +	return status; +} + +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) +{ +	struct ocfs2_lock_res *lockres; +	struct ocfs2_orphan_scan_lvb *lvb; +	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + +	lockres = &osb->osb_orphan_scan.os_lockres; +	lvb = ocfs2_dlm_lvb(&lockres->l_lksb); +	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; +	lvb->lvb_os_seqno = cpu_to_be32(seqno); +	ocfs2_cluster_unlock(osb, lockres, level); +} +  int ocfs2_super_lock(struct ocfs2_super *osb,  		     int ex)  { @@ -2842,6 +2890,7 @@ local:  	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);  	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);  	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); +	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);  	osb->cconn = conn; @@ -2878,6 +2927,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,  	ocfs2_lock_res_free(&osb->osb_super_lockres);  	ocfs2_lock_res_free(&osb->osb_rename_lockres);  	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); +	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);  	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);  	osb->cconn = NULL; @@ -3061,6 +3111,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)  	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);  	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);  	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); +	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);  }  int ocfs2_drop_inode_locks(struct inode *inode) diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index e1fd5721cd7..31b90d7b8f5 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -62,6 +62,14 @@ struct ocfs2_qinfo_lvb {  	__be32	lvb_free_entry;  }; +#define OCFS2_ORPHAN_LVB_VERSION 1 + +struct ocfs2_orphan_scan_lvb { +	__u8	lvb_version; +	__u8	lvb_reserved[3]; +	__be32	lvb_os_seqno; +}; +  /* ocfs2_inode_lock_full() 'arg_flags' flags */  /* don't wait on recovery. */  #define OCFS2_META_LOCK_RECOVERY	(0x01) @@ -113,6 +121,9 @@ int ocfs2_super_lock(struct ocfs2_super *osb,  		     int ex);  void ocfs2_super_unlock(struct ocfs2_super *osb,  			int ex); +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex); +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex); +  int ocfs2_rename_lock(struct ocfs2_super *osb);  void ocfs2_rename_unlock(struct ocfs2_super *osb);  int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c2a87c885b7..07267e0da90 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -187,6 +187,9 @@ static int ocfs2_sync_file(struct file *file,  	if (err)  		goto bail; +	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) +		goto bail; +  	journal = osb->journal->j_journal;  	err = jbd2_journal_force_commit(journal); @@ -894,9 +897,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)  	struct ocfs2_super *osb = OCFS2_SB(sb);  	struct buffer_head *bh = NULL;  	handle_t *handle = NULL; -	int locked[MAXQUOTAS] = {0, 0}; -	int credits, qtype; -	struct ocfs2_mem_dqinfo *oinfo; +	int qtype; +	struct dquot *transfer_from[MAXQUOTAS] = { }; +	struct dquot *transfer_to[MAXQUOTAS] = { };  	mlog_entry("(0x%p, '%.*s')\n", dentry,  	           dentry->d_name.len, dentry->d_name.name); @@ -969,30 +972,37 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)  	if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||  	    (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { -		credits = OCFS2_INODE_UPDATE_CREDITS; +		/* +		 * Gather pointers to quota structures so that allocation / +		 * freeing of quota structures happens here and not inside +		 * vfs_dq_transfer() where we have problems with lock ordering +		 */  		if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid  		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,  		    OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { -			oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv; -			status = ocfs2_lock_global_qf(oinfo, 1); -			if (status < 0) +			transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, +						      USRQUOTA); +			transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, +							USRQUOTA); +			if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) { +				status = -ESRCH;  				goto bail_unlock; -			credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) + -				ocfs2_calc_qdel_credits(sb, USRQUOTA); -			locked[USRQUOTA] = 1; +			}  		}  		if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid  		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,  		    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { -			oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv; -			status = ocfs2_lock_global_qf(oinfo, 1); -			if (status < 0) +			transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, +						      GRPQUOTA); +			transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, +							GRPQUOTA); +			if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) { +				status = -ESRCH;  				goto bail_unlock; -			credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) + -				   ocfs2_calc_qdel_credits(sb, GRPQUOTA); -			locked[GRPQUOTA] = 1; +			}  		} -		handle = ocfs2_start_trans(osb, credits); +		handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS + +					   2 * ocfs2_quota_trans_credits(sb));  		if (IS_ERR(handle)) {  			status = PTR_ERR(handle);  			mlog_errno(status); @@ -1030,12 +1040,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)  bail_commit:  	ocfs2_commit_trans(osb, handle);  bail_unlock: -	for (qtype = 0; qtype < MAXQUOTAS; qtype++) { -		if (!locked[qtype]) -			continue; -		oinfo = sb_dqinfo(sb, qtype)->dqi_priv; -		ocfs2_unlock_global_qf(oinfo, 1); -	}  	ocfs2_inode_unlock(inode, 1);  bail_unlock_rw:  	if (size_change) @@ -1043,6 +1047,12 @@ bail_unlock_rw:  bail:  	brelse(bh); +	/* Release quota pointers in case we acquired them */ +	for (qtype = 0; qtype < MAXQUOTAS; qtype++) { +		dqput(transfer_to[qtype]); +		dqput(transfer_from[qtype]); +	} +  	if (!status && attr->ia_valid & ATTR_MODE) {  		status = ocfs2_acl_chmod(inode);  		if (status < 0) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a20a0f1e37f..4a3b9e6b31a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -28,6 +28,8 @@  #include <linux/slab.h>  #include <linux/highmem.h>  #include <linux/kthread.h> +#include <linux/time.h> +#include <linux/random.h>  #define MLOG_MASK_PREFIX ML_JOURNAL  #include <cluster/masklog.h> @@ -52,6 +54,8 @@  DEFINE_SPINLOCK(trans_inc_lock); +#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000 +  static int ocfs2_force_read_journal(struct inode *inode);  static int ocfs2_recover_node(struct ocfs2_super *osb,  			      int node_num, int slot_num); @@ -1841,6 +1845,113 @@ bail:  	return status;  } +/* + * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some + * randomness to the timeout to minimize multple nodes firing the timer at the + * same time. + */ +static inline unsigned long ocfs2_orphan_scan_timeout(void) +{ +	unsigned long time; + +	get_random_bytes(&time, sizeof(time)); +	time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000); +	return msecs_to_jiffies(time); +} + +/* + * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for + * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This + * is done to catch any orphans that are left over in orphan directories. + * + * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT + * seconds.  It gets an EX lock on os_lockres and checks sequence number + * stored in LVB. If the sequence number has changed, it means some other + * node has done the scan.  This node skips the scan and tracks the + * sequence number.  If the sequence number didn't change, it means a scan + * hasn't happened.  The node queues a scan and increments the + * sequence number in the LVB. + */ +void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) +{ +	struct ocfs2_orphan_scan *os; +	int status, i; +	u32 seqno = 0; + +	os = &osb->osb_orphan_scan; + +	status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); +	if (status < 0) { +		if (status != -EAGAIN) +			mlog_errno(status); +		goto out; +	} + +	if (os->os_seqno != seqno) { +		os->os_seqno = seqno; +		goto unlock; +	} + +	for (i = 0; i < osb->max_slots; i++) +		ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, +						NULL); +	/* +	 * We queued a recovery on orphan slots, increment the sequence +	 * number and update LVB so other node will skip the scan for a while +	 */ +	seqno++; +	os->os_count++; +	os->os_scantime = CURRENT_TIME; +unlock: +	ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); +out: +	return; +} + +/* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */ +void ocfs2_orphan_scan_work(struct work_struct *work) +{ +	struct ocfs2_orphan_scan *os; +	struct ocfs2_super *osb; + +	os = container_of(work, struct ocfs2_orphan_scan, +			  os_orphan_scan_work.work); +	osb = os->os_osb; + +	mutex_lock(&os->os_lock); +	ocfs2_queue_orphan_scan(osb); +	schedule_delayed_work(&os->os_orphan_scan_work, +			      ocfs2_orphan_scan_timeout()); +	mutex_unlock(&os->os_lock); +} + +void ocfs2_orphan_scan_stop(struct ocfs2_super *osb) +{ +	struct ocfs2_orphan_scan *os; + +	os = &osb->osb_orphan_scan; +	mutex_lock(&os->os_lock); +	cancel_delayed_work(&os->os_orphan_scan_work); +	mutex_unlock(&os->os_lock); +} + +int ocfs2_orphan_scan_init(struct ocfs2_super *osb) +{ +	struct ocfs2_orphan_scan *os; + +	os = &osb->osb_orphan_scan; +	os->os_osb = osb; +	os->os_count = 0; +	os->os_scantime = CURRENT_TIME; +	mutex_init(&os->os_lock); + +	INIT_DELAYED_WORK(&os->os_orphan_scan_work, +			  ocfs2_orphan_scan_work); +	schedule_delayed_work(&os->os_orphan_scan_work, +			      ocfs2_orphan_scan_timeout()); +	return 0; +} +  struct ocfs2_orphan_filldir_priv {  	struct inode		*head;  	struct ocfs2_super	*osb; diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index eb7b76331eb..61045eeb3f6 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -144,6 +144,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,  }  /* Exported only for the journal struct init code in super.c. Do not call. */ +int ocfs2_orphan_scan_init(struct ocfs2_super *osb); +void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); +void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); +  void ocfs2_complete_recovery(struct work_struct *work);  void ocfs2_wait_for_recovery(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1386281950d..18c1d9ec1c9 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -47,6 +47,9 @@  #include "ocfs2_fs.h"  #include "ocfs2_lockid.h" +/* For struct ocfs2_blockcheck_stats */ +#include "blockcheck.h" +  /* Most user visible OCFS2 inodes will have very few pieces of   * metadata, but larger files (including bitmaps, etc) must be taken   * into account when designing an access scheme. We allow a small @@ -151,6 +154,16 @@ struct ocfs2_lock_res {  #endif  }; +struct ocfs2_orphan_scan { +	struct mutex 		os_lock; +	struct ocfs2_super 	*os_osb; +	struct ocfs2_lock_res 	os_lockres;     /* lock to synchronize scans */ +	struct delayed_work 	os_orphan_scan_work; +	struct timespec		os_scantime;  /* time this node ran the scan */ +	u32			os_count;      /* tracks node specific scans */ +	u32  			os_seqno;       /* tracks cluster wide scans */ +}; +  struct ocfs2_dlm_debug {  	struct kref d_refcnt;  	struct dentry *d_locking_state; @@ -295,6 +308,7 @@ struct ocfs2_super  	struct ocfs2_dinode *local_alloc_copy;  	struct ocfs2_quota_recovery *quota_rec; +	struct ocfs2_blockcheck_stats osb_ecc_stats;  	struct ocfs2_alloc_stats alloc_stats;  	char dev_str[20];		/* "major,minor" of the device */ @@ -341,6 +355,8 @@ struct ocfs2_super  	unsigned int			*osb_orphan_wipes;  	wait_queue_head_t		osb_wipe_event; +	struct ocfs2_orphan_scan	osb_orphan_scan; +  	/* used to protect metaecc calculation check of xattr. */  	spinlock_t osb_xattr_lock; diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index a53ce87481b..fcdba091af3 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -48,6 +48,7 @@ enum ocfs2_lock_type {  	OCFS2_LOCK_TYPE_FLOCK,  	OCFS2_LOCK_TYPE_QINFO,  	OCFS2_LOCK_TYPE_NFS_SYNC, +	OCFS2_LOCK_TYPE_ORPHAN_SCAN,  	OCFS2_NUM_LOCK_TYPES  }; @@ -85,6 +86,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)  		case OCFS2_LOCK_TYPE_NFS_SYNC:  			c = 'Y';  			break; +		case OCFS2_LOCK_TYPE_ORPHAN_SCAN: +			c = 'P'; +			break;  		default:  			c = '\0';  	} @@ -104,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = {  	[OCFS2_LOCK_TYPE_OPEN] = "Open",  	[OCFS2_LOCK_TYPE_FLOCK] = "Flock",  	[OCFS2_LOCK_TYPE_QINFO] = "Quota", +	[OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",  };  static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 1ed0f7c8686..edfa60cd155 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -421,6 +421,7 @@ int ocfs2_global_read_dquot(struct dquot *dquot)  	OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;  	if (!dquot->dq_off) {	/* No real quota entry? */  		/* Upgrade to exclusive lock for allocation */ +		ocfs2_qinfo_unlock(info, 0);  		err = ocfs2_qinfo_lock(info, 1);  		if (err < 0)  			goto out_qlock; @@ -435,7 +436,8 @@ int ocfs2_global_read_dquot(struct dquot *dquot)  out_qlock:  	if (ex)  		ocfs2_qinfo_unlock(info, 1); -	ocfs2_qinfo_unlock(info, 0); +	else +		ocfs2_qinfo_unlock(info, 0);  out:  	if (err < 0)  		mlog_errno(err); diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 07deec5e972..5a460fa8255 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -444,10 +444,6 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,  	mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type); -	status = ocfs2_lock_global_qf(oinfo, 1); -	if (status < 0) -		goto out; -  	list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {  		chunk = rchunk->rc_chunk;  		hbh = NULL; @@ -480,12 +476,18 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,  				     type);  				goto out_put_bh;  			} +			status = ocfs2_lock_global_qf(oinfo, 1); +			if (status < 0) { +				mlog_errno(status); +				goto out_put_dquot; +			} +  			handle = ocfs2_start_trans(OCFS2_SB(sb),  						   OCFS2_QSYNC_CREDITS);  			if (IS_ERR(handle)) {  				status = PTR_ERR(handle);  				mlog_errno(status); -				goto out_put_dquot; +				goto out_drop_lock;  			}  			mutex_lock(&sb_dqopt(sb)->dqio_mutex);  			spin_lock(&dq_data_lock); @@ -523,6 +525,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,  out_commit:  			mutex_unlock(&sb_dqopt(sb)->dqio_mutex);  			ocfs2_commit_trans(OCFS2_SB(sb), handle); +out_drop_lock: +			ocfs2_unlock_global_qf(oinfo, 1);  out_put_dquot:  			dqput(dquot);  out_put_bh: @@ -537,8 +541,6 @@ out_put_bh:  		if (status < 0)  			break;  	} -	ocfs2_unlock_global_qf(oinfo, 1); -out:  	if (status < 0)  		free_recovery_list(&(rec->r_list[type]));  	mlog_exit(status); @@ -655,6 +657,9 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)  	struct ocfs2_quota_recovery *rec;  	int locked = 0; +	/* We don't need the lock and we have to acquire quota file locks +	 * which will later depend on this lock */ +	mutex_unlock(&sb_dqopt(sb)->dqio_mutex);  	info->dqi_maxblimit = 0x7fffffffffffffffLL;  	info->dqi_maxilimit = 0x7fffffffffffffffLL;  	oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); @@ -733,6 +738,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)  		goto out_err;  	} +	mutex_lock(&sb_dqopt(sb)->dqio_mutex);  	return 0;  out_err:  	if (oinfo) { @@ -746,6 +752,7 @@ out_err:  		kfree(oinfo);  	}  	brelse(bh); +	mutex_lock(&sb_dqopt(sb)->dqio_mutex);  	return -1;  } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 201b40a441f..d33767f17ba 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -119,10 +119,12 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb);  static int ocfs2_check_volume(struct ocfs2_super *osb);  static int ocfs2_verify_volume(struct ocfs2_dinode *di,  			       struct buffer_head *bh, -			       u32 sectsize); +			       u32 sectsize, +			       struct ocfs2_blockcheck_stats *stats);  static int ocfs2_initialize_super(struct super_block *sb,  				  struct buffer_head *bh, -				  int sector_size); +				  int sector_size, +				  struct ocfs2_blockcheck_stats *stats);  static int ocfs2_get_sector(struct super_block *sb,  			    struct buffer_head **bh,  			    int block, @@ -207,6 +209,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)  	int i;  	struct ocfs2_cluster_connection *cconn = osb->cconn;  	struct ocfs2_recovery_map *rm = osb->recovery_map; +	struct ocfs2_orphan_scan *os;  	out += snprintf(buf + out, len - out,  			"%10s => Id: %-s  Uuid: %-s  Gen: 0x%X  Label: %-s\n", @@ -308,6 +311,13 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)  				i, osb->slot_recovery_generations[i]);  	} +	os = &osb->osb_orphan_scan; +	out += snprintf(buf + out, len - out, "Orphan Scan=> "); +	out += snprintf(buf + out, len - out, "Local: %u  Global: %u ", +			os->os_count, os->os_seqno); +	out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n", +			(get_seconds() - os->os_scantime.tv_sec)); +  	return out;  } @@ -693,7 +703,8 @@ out:  static int ocfs2_sb_probe(struct super_block *sb,  			  struct buffer_head **bh, -			  int *sector_size) +			  int *sector_size, +			  struct ocfs2_blockcheck_stats *stats)  {  	int status, tmpstat;  	struct ocfs1_vol_disk_hdr *hdr; @@ -759,7 +770,8 @@ static int ocfs2_sb_probe(struct super_block *sb,  			goto bail;  		}  		di = (struct ocfs2_dinode *) (*bh)->b_data; -		status = ocfs2_verify_volume(di, *bh, blksize); +		memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); +		status = ocfs2_verify_volume(di, *bh, blksize, stats);  		if (status >= 0)  			goto bail;  		brelse(*bh); @@ -965,6 +977,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)  	struct ocfs2_super *osb = NULL;  	struct buffer_head *bh = NULL;  	char nodestr[8]; +	struct ocfs2_blockcheck_stats stats;  	mlog_entry("%p, %p, %i", sb, data, silent); @@ -974,13 +987,13 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)  	}  	/* probe for superblock */ -	status = ocfs2_sb_probe(sb, &bh, §or_size); +	status = ocfs2_sb_probe(sb, &bh, §or_size, &stats);  	if (status < 0) {  		mlog(ML_ERROR, "superblock probe failed!\n");  		goto read_super_error;  	} -	status = ocfs2_initialize_super(sb, bh, sector_size); +	status = ocfs2_initialize_super(sb, bh, sector_size, &stats);  	osb = OCFS2_SB(sb);  	if (status < 0) {  		mlog_errno(status); @@ -1090,6 +1103,18 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)  		goto read_super_error;  	} +	if (ocfs2_meta_ecc(osb)) { +		status = ocfs2_blockcheck_stats_debugfs_install( +						&osb->osb_ecc_stats, +						osb->osb_debug_root); +		if (status) { +			mlog(ML_ERROR, +			     "Unable to create blockcheck statistics " +			     "files\n"); +			goto read_super_error; +		} +	} +  	status = ocfs2_mount_volume(sb);  	if (osb->root_inode)  		inode = igrab(osb->root_inode); @@ -1760,13 +1785,8 @@ static int ocfs2_mount_volume(struct super_block *sb)  	}  	status = ocfs2_truncate_log_init(osb); -	if (status < 0) { +	if (status < 0)  		mlog_errno(status); -		goto leave; -	} - -	if (ocfs2_mount_local(osb)) -		goto leave;  leave:  	if (unlock_super) @@ -1796,6 +1816,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)  	ocfs2_truncate_log_shutdown(osb); +	ocfs2_orphan_scan_stop(osb); +  	/* This will disable recovery and flush any recovery work. */  	ocfs2_recovery_exit(osb); @@ -1833,6 +1855,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)  	if (osb->cconn)  		ocfs2_dlm_shutdown(osb, hangup_needed); +	ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);  	debugfs_remove(osb->osb_debug_root);  	if (hangup_needed) @@ -1880,7 +1903,8 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu  static int ocfs2_initialize_super(struct super_block *sb,  				  struct buffer_head *bh, -				  int sector_size) +				  int sector_size, +				  struct ocfs2_blockcheck_stats *stats)  {  	int status;  	int i, cbits, bbits; @@ -1939,6 +1963,9 @@ static int ocfs2_initialize_super(struct super_block *sb,  	atomic_set(&osb->alloc_stats.bg_allocs, 0);  	atomic_set(&osb->alloc_stats.bg_extends, 0); +	/* Copy the blockcheck stats from the superblock probe */ +	osb->osb_ecc_stats = *stats; +  	ocfs2_init_node_maps(osb);  	snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", @@ -1951,6 +1978,13 @@ static int ocfs2_initialize_super(struct super_block *sb,  		goto bail;  	} +	status = ocfs2_orphan_scan_init(osb); +	if (status) { +		mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n"); +		mlog_errno(status); +		goto bail; +	} +  	init_waitqueue_head(&osb->checkpoint_event);  	atomic_set(&osb->needs_checkpoint, 0); @@ -2169,7 +2203,8 @@ bail:   */  static int ocfs2_verify_volume(struct ocfs2_dinode *di,  			       struct buffer_head *bh, -			       u32 blksz) +			       u32 blksz, +			       struct ocfs2_blockcheck_stats *stats)  {  	int status = -EAGAIN; @@ -2182,7 +2217,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,  		    OCFS2_FEATURE_INCOMPAT_META_ECC) {  			status = ocfs2_block_check_validate(bh->b_data,  							    bh->b_size, -							    &di->i_check); +							    &di->i_check, +							    stats);  			if (status)  				goto out;  		} diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 15631019dc6..ba320e25074 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3154,7 +3154,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,  		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));  		if (func) {  			ret = func(inode, bucket, para); -			if (ret) +			if (ret && ret != -ERANGE)  				mlog_errno(ret);  			/* Fall through to bucket_relse() */  		} @@ -3261,7 +3261,8 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,  						  ocfs2_list_xattr_bucket,  						  &xl);  		if (ret) { -			mlog_errno(ret); +			if (ret != -ERANGE) +				mlog_errno(ret);  			goto out;  		} | 
