diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/bio.c | 1 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 26 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 12 | ||||
| -rw-r--r-- | fs/fat/cache.c | 6 | ||||
| -rw-r--r-- | fs/fat/dir.c | 2 | ||||
| -rw-r--r-- | fs/fat/fat.h | 7 | ||||
| -rw-r--r-- | fs/fat/fatent.c | 4 | ||||
| -rw-r--r-- | fs/fat/file.c | 184 | ||||
| -rw-r--r-- | fs/fat/inode.c | 28 | ||||
| -rw-r--r-- | fs/fat/misc.c | 22 | ||||
| -rw-r--r-- | fs/fat/namei_msdos.c | 2 | ||||
| -rw-r--r-- | fs/fat/namei_vfat.c | 2 | ||||
| -rw-r--r-- | fs/jfs/jfs_extent.c | 1 | ||||
| -rw-r--r-- | fs/ocfs2/alloc.c | 80 | ||||
| -rw-r--r-- | fs/ocfs2/blockcheck.c | 184 | ||||
| -rw-r--r-- | fs/ocfs2/blockcheck.h | 29 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/masklog.h | 35 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/tcp.c | 7 | ||||
| -rw-r--r-- | fs/ocfs2/dir.c | 21 | ||||
| -rw-r--r-- | fs/ocfs2/dlmglue.c | 51 | ||||
| -rw-r--r-- | fs/ocfs2/dlmglue.h | 11 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 56 | ||||
| -rw-r--r-- | fs/ocfs2/journal.c | 111 | ||||
| -rw-r--r-- | fs/ocfs2/journal.h | 4 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2.h | 16 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2_lockid.h | 5 | ||||
| -rw-r--r-- | fs/ocfs2/quota_global.c | 4 | ||||
| -rw-r--r-- | fs/ocfs2/quota_local.c | 21 | ||||
| -rw-r--r-- | fs/ocfs2/super.c | 66 | ||||
| -rw-r--r-- | fs/ocfs2/xattr.c | 5 | ||||
| -rw-r--r-- | fs/ubifs/super.c | 3 | 
31 files changed, 788 insertions, 218 deletions
| @@ -25,7 +25,6 @@  #include <linux/module.h>  #include <linux/mempool.h>  #include <linux/workqueue.h> -#include <linux/blktrace_api.h>  #include <scsi/sg.h>		/* for struct sg_iovec */  #include <trace/events/block.h> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d50d49d990..d28d29c95f7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -42,6 +42,8 @@  static struct extent_io_ops btree_extent_io_ops;  static void end_workqueue_fn(struct btrfs_work *work); +static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); +  /*   * end_io_wq structs are used to do processing in task context when an IO is   * complete.  This is used during reads to verify checksums, and it is used @@ -1342,12 +1344,25 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)  	free_extent_map(em);  } +/* + * If this fails, caller must call bdi_destroy() to get rid of the + * bdi again. + */  static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)  { -	bdi_init(bdi); +	int err; + +	bdi->capabilities = BDI_CAP_MAP_COPY; +	err = bdi_init(bdi); +	if (err) +		return err; + +	err = bdi_register(bdi, NULL, "btrfs-%d", +				atomic_inc_return(&btrfs_bdi_num)); +	if (err) +		return err; +  	bdi->ra_pages	= default_backing_dev_info.ra_pages; -	bdi->state		= 0; -	bdi->capabilities	= default_backing_dev_info.capabilities;  	bdi->unplug_io_fn	= btrfs_unplug_io_fn;  	bdi->unplug_io_data	= info;  	bdi->congested_fn	= btrfs_congested_fn; @@ -1569,7 +1584,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,  	fs_info->sb = sb;  	fs_info->max_extent = (u64)-1;  	fs_info->max_inline = 8192 * 1024; -	setup_bdi(fs_info, &fs_info->bdi); +	if (setup_bdi(fs_info, &fs_info->bdi)) +		goto fail_bdi;  	fs_info->btree_inode = new_inode(sb);  	fs_info->btree_inode->i_ino = 1;  	fs_info->btree_inode->i_nlink = 1; @@ -1946,8 +1962,8 @@ fail_iput:  	btrfs_close_devices(fs_info->fs_devices);  	btrfs_mapping_tree_free(&fs_info->mapping_tree); +fail_bdi:  	bdi_destroy(&fs_info->bdi); -  fail:  	kfree(extent_root);  	kfree(tree_root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2e177d7f4bb..4e83457ea25 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -543,13 +543,13 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,  			btrfs_free_log(trans, root);  			btrfs_update_reloc_root(trans, root); -			if (root->commit_root == root->node) -				continue; - -			free_extent_buffer(root->commit_root); -			root->commit_root = btrfs_root_node(root); +			if (root->commit_root != root->node) { +				free_extent_buffer(root->commit_root); +				root->commit_root = btrfs_root_node(root); +				btrfs_set_root_node(&root->root_item, +						    root->node); +			} -			btrfs_set_root_node(&root->root_item, root->node);  			err = btrfs_update_root(trans, fs_info->tree_root,  						&root->root_key,  						&root->root_item); diff --git a/fs/fat/cache.c b/fs/fat/cache.c index b4260229808..923990e4f16 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -241,7 +241,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)  	while (*fclus < cluster) {  		/* prevent the infinite loop of cluster chain */  		if (*fclus > limit) { -			fat_fs_panic(sb, "%s: detected the cluster chain loop" +			fat_fs_error(sb, "%s: detected the cluster chain loop"  				     " (i_pos %lld)", __func__,  				     MSDOS_I(inode)->i_pos);  			nr = -EIO; @@ -252,7 +252,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)  		if (nr < 0)  			goto out;  		else if (nr == FAT_ENT_FREE) { -			fat_fs_panic(sb, "%s: invalid cluster chain" +			fat_fs_error(sb, "%s: invalid cluster chain"  				     " (i_pos %lld)", __func__,  				     MSDOS_I(inode)->i_pos);  			nr = -EIO; @@ -285,7 +285,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)  	if (ret < 0)  		return ret;  	else if (ret == FAT_ENT_EOF) { -		fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", +		fat_fs_error(sb, "%s: request beyond EOF (i_pos %lld)",  			     __func__, MSDOS_I(inode)->i_pos);  		return -EIO;  	} diff --git a/fs/fat/dir.c b/fs/fat/dir.c index f3500294eec..3b8e71b412f 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1334,7 +1334,7 @@ found:  			goto error_remove;  		}  		if (dir->i_size & (sbi->cluster_size - 1)) { -			fat_fs_panic(sb, "Odd directory size"); +			fat_fs_error(sb, "Odd directory size");  			dir->i_size = (dir->i_size + sbi->cluster_size - 1)  				& ~((loff_t)sbi->cluster_size - 1);  		} diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e4d88527b5d..adb0e72a176 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -17,6 +17,10 @@  #define VFAT_SFN_CREATE_WIN95	0x0100 /* emulate win95 rule for create */  #define VFAT_SFN_CREATE_WINNT	0x0200 /* emulate winnt rule for create */ +#define FAT_ERRORS_CONT		1      /* ignore error and continue */ +#define FAT_ERRORS_PANIC	2      /* panic on error */ +#define FAT_ERRORS_RO		3      /* remount r/o on error */ +  struct fat_mount_options {  	uid_t fs_uid;  	gid_t fs_gid; @@ -26,6 +30,7 @@ struct fat_mount_options {  	char *iocharset;          /* Charset used for filename input/display */  	unsigned short shortname; /* flags for shortname display/create rule */  	unsigned char name_check; /* r = relaxed, n = normal, s = strict */ +	unsigned char errors;	  /* On error: continue, panic, remount-ro */  	unsigned short allow_utime;/* permission for setting the [am]time */  	unsigned quiet:1,         /* set = fake successful chmods and chowns */  		 showexec:1,      /* set = only set x bit for com/exe/bat */ @@ -316,7 +321,7 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent,  extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,  		            struct inode *i2);  /* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...) +extern void fat_fs_error(struct super_block *s, const char *fmt, ...)  	__attribute__ ((format (printf, 2, 3))) __cold;  extern void fat_clusters_flush(struct super_block *sb);  extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 618f5305c2e..a81037721a6 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -348,7 +348,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry)  	if (entry < FAT_START_ENT || sbi->max_cluster <= entry) {  		fatent_brelse(fatent); -		fat_fs_panic(sb, "invalid access to FAT (entry 0x%08x)", entry); +		fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry);  		return -EIO;  	} @@ -560,7 +560,7 @@ int fat_free_clusters(struct inode *inode, int cluster)  			err = cluster;  			goto error;  		} else if (cluster == FAT_ENT_FREE) { -			fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", +			fat_fs_error(sb, "%s: deleting FAT entry beyond EOF",  				     __func__);  			err = -EIO;  			goto error; diff --git a/fs/fat/file.c b/fs/fat/file.c index e955a56b4e5..b28ea646ff6 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -18,106 +18,112 @@  #include <linux/security.h>  #include "fat.h" -int fat_generic_ioctl(struct inode *inode, struct file *filp, -		      unsigned int cmd, unsigned long arg) +static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)  { +	u32 attr; + +	mutex_lock(&inode->i_mutex); +	attr = fat_make_attrs(inode); +	mutex_unlock(&inode->i_mutex); + +	return put_user(attr, user_attr); +} + +static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) +{ +	struct inode *inode = file->f_path.dentry->d_inode;  	struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); -	u32 __user *user_attr = (u32 __user *)arg; +	int is_dir = S_ISDIR(inode->i_mode); +	u32 attr, oldattr; +	struct iattr ia; +	int err; -	switch (cmd) { -	case FAT_IOCTL_GET_ATTRIBUTES: -	{ -		u32 attr; +	err = get_user(attr, user_attr); +	if (err) +		goto out; -		mutex_lock(&inode->i_mutex); -		attr = fat_make_attrs(inode); -		mutex_unlock(&inode->i_mutex); +	mutex_lock(&inode->i_mutex); +	err = mnt_want_write(file->f_path.mnt); +	if (err) +		goto out_unlock_inode; -		return put_user(attr, user_attr); +	/* +	 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also +	 * prevents the user from turning us into a VFAT +	 * longname entry.  Also, we obviously can't set +	 * any of the NTFS attributes in the high 24 bits. +	 */ +	attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR); +	/* Merge in ATTR_VOLUME and ATTR_DIR */ +	attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | +		(is_dir ? ATTR_DIR : 0); +	oldattr = fat_make_attrs(inode); + +	/* Equivalent to a chmod() */ +	ia.ia_valid = ATTR_MODE | ATTR_CTIME; +	ia.ia_ctime = current_fs_time(inode->i_sb); +	if (is_dir) +		ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); +	else { +		ia.ia_mode = fat_make_mode(sbi, attr, +			S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));  	} -	case FAT_IOCTL_SET_ATTRIBUTES: -	{ -		u32 attr, oldattr; -		int err, is_dir = S_ISDIR(inode->i_mode); -		struct iattr ia; -		err = get_user(attr, user_attr); -		if (err) -			return err; +	/* The root directory has no attributes */ +	if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { +		err = -EINVAL; +		goto out_drop_write; +	} -		mutex_lock(&inode->i_mutex); - -		err = mnt_want_write(filp->f_path.mnt); -		if (err) -			goto up_no_drop_write; - -		/* -		 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also -		 * prevents the user from turning us into a VFAT -		 * longname entry.  Also, we obviously can't set -		 * any of the NTFS attributes in the high 24 bits. -		 */ -		attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR); -		/* Merge in ATTR_VOLUME and ATTR_DIR */ -		attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | -			(is_dir ? ATTR_DIR : 0); -		oldattr = fat_make_attrs(inode); - -		/* Equivalent to a chmod() */ -		ia.ia_valid = ATTR_MODE | ATTR_CTIME; -		ia.ia_ctime = current_fs_time(inode->i_sb); -		if (is_dir) -			ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); -		else { -			ia.ia_mode = fat_make_mode(sbi, attr, -				S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); -		} +	if (sbi->options.sys_immutable && +	    ((attr | oldattr) & ATTR_SYS) && +	    !capable(CAP_LINUX_IMMUTABLE)) { +		err = -EPERM; +		goto out_drop_write; +	} -		/* The root directory has no attributes */ -		if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { -			err = -EINVAL; -			goto up; -		} +	/* +	 * The security check is questionable...  We single +	 * out the RO attribute for checking by the security +	 * module, just because it maps to a file mode. +	 */ +	err = security_inode_setattr(file->f_path.dentry, &ia); +	if (err) +		goto out_drop_write; -		if (sbi->options.sys_immutable) { -			if ((attr | oldattr) & ATTR_SYS) { -				if (!capable(CAP_LINUX_IMMUTABLE)) { -					err = -EPERM; -					goto up; -				} -			} -		} +	/* This MUST be done before doing anything irreversible... */ +	err = fat_setattr(file->f_path.dentry, &ia); +	if (err) +		goto out_drop_write; + +	fsnotify_change(file->f_path.dentry, ia.ia_valid); +	if (sbi->options.sys_immutable) { +		if (attr & ATTR_SYS) +			inode->i_flags |= S_IMMUTABLE; +		else +			inode->i_flags &= S_IMMUTABLE; +	} -		/* -		 * The security check is questionable...  We single -		 * out the RO attribute for checking by the security -		 * module, just because it maps to a file mode. -		 */ -		err = security_inode_setattr(filp->f_path.dentry, &ia); -		if (err) -			goto up; - -		/* This MUST be done before doing anything irreversible... */ -		err = fat_setattr(filp->f_path.dentry, &ia); -		if (err) -			goto up; - -		fsnotify_change(filp->f_path.dentry, ia.ia_valid); -		if (sbi->options.sys_immutable) { -			if (attr & ATTR_SYS) -				inode->i_flags |= S_IMMUTABLE; -			else -				inode->i_flags &= S_IMMUTABLE; -		} +	fat_save_attrs(inode, attr); +	mark_inode_dirty(inode); +out_drop_write: +	mnt_drop_write(file->f_path.mnt); +out_unlock_inode: +	mutex_unlock(&inode->i_mutex); +out: +	return err; +} -		fat_save_attrs(inode, attr); -		mark_inode_dirty(inode); -up: -		mnt_drop_write(filp->f_path.mnt); -up_no_drop_write: -		mutex_unlock(&inode->i_mutex); -		return err; -	} +int fat_generic_ioctl(struct inode *inode, struct file *filp, +		      unsigned int cmd, unsigned long arg) +{ +	u32 __user *user_attr = (u32 __user *)arg; + +	switch (cmd) { +	case FAT_IOCTL_GET_ATTRIBUTES: +		return fat_ioctl_get_attributes(inode, user_attr); +	case FAT_IOCTL_SET_ATTRIBUTES: +		return fat_ioctl_set_attributes(filp, user_attr);  	default:  		return -ENOTTY;	/* Inappropriate ioctl for device */  	} @@ -225,7 +231,7 @@ static int fat_free(struct inode *inode, int skip)  			fatent_brelse(&fatent);  			return 0;  		} else if (ret == FAT_ENT_FREE) { -			fat_fs_panic(sb, +			fat_fs_error(sb,  				     "%s: invalid cluster chain (i_pos %lld)",  				     __func__, MSDOS_I(inode)->i_pos);  			ret = -EIO; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 51a5ecf9000..304b411cb8b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -76,7 +76,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,  		return 0;  	if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { -		fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", +		fat_fs_error(sb, "corrupted file size (i_pos %lld, %lld)",  			MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);  		return -EIO;  	} @@ -856,6 +856,12 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)  		seq_puts(m, ",flush");  	if (opts->tz_utc)  		seq_puts(m, ",tz=UTC"); +	if (opts->errors == FAT_ERRORS_CONT) +		seq_puts(m, ",errors=continue"); +	else if (opts->errors == FAT_ERRORS_PANIC) +		seq_puts(m, ",errors=panic"); +	else +		seq_puts(m, ",errors=remount-ro");  	return 0;  } @@ -868,7 +874,8 @@ enum {  	Opt_charset, Opt_shortname_lower, Opt_shortname_win95,  	Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,  	Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, -	Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err, +	Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, +	Opt_err_panic, Opt_err_ro, Opt_err,  };  static const match_table_t fat_tokens = { @@ -891,6 +898,11 @@ static const match_table_t fat_tokens = {  	{Opt_showexec, "showexec"},  	{Opt_debug, "debug"},  	{Opt_immutable, "sys_immutable"}, +	{Opt_flush, "flush"}, +	{Opt_tz_utc, "tz=UTC"}, +	{Opt_err_cont, "errors=continue"}, +	{Opt_err_panic, "errors=panic"}, +	{Opt_err_ro, "errors=remount-ro"},  	{Opt_obsolate, "conv=binary"},  	{Opt_obsolate, "conv=text"},  	{Opt_obsolate, "conv=auto"}, @@ -902,8 +914,6 @@ static const match_table_t fat_tokens = {  	{Opt_obsolate, "cvf_format=%20s"},  	{Opt_obsolate, "cvf_options=%100s"},  	{Opt_obsolate, "posix"}, -	{Opt_flush, "flush"}, -	{Opt_tz_utc, "tz=UTC"},  	{Opt_err, NULL},  };  static const match_table_t msdos_tokens = { @@ -973,6 +983,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,  	opts->numtail = 1;  	opts->usefree = opts->nocase = 0;  	opts->tz_utc = 0; +	opts->errors = FAT_ERRORS_RO;  	*debug = 0;  	if (!options) @@ -1065,6 +1076,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,  		case Opt_tz_utc:  			opts->tz_utc = 1;  			break; +		case Opt_err_cont: +			opts->errors = FAT_ERRORS_CONT; +			break; +		case Opt_err_panic: +			opts->errors = FAT_ERRORS_PANIC; +			break; +		case Opt_err_ro: +			opts->errors = FAT_ERRORS_RO; +			break;  		/* msdos specific */  		case Opt_dots: diff --git a/fs/fat/misc.c b/fs/fat/misc.c index ac39ebcc149..a6c20473dfd 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -12,14 +12,19 @@  #include "fat.h"  /* - * fat_fs_panic reports a severe file system problem and sets the file system - * read-only. The file system can be made writable again by remounting it. + * fat_fs_error reports a file system problem that might indicate fa data + * corruption/inconsistency. Depending on 'errors' mount option the + * panic() is called, or error message is printed FAT and nothing is done, + * or filesystem is remounted read-only (default behavior). + * In case the file system is remounted read-only, it can be made writable + * again by remounting it.   */ -void fat_fs_panic(struct super_block *s, const char *fmt, ...) +void fat_fs_error(struct super_block *s, const char *fmt, ...)  { +	struct fat_mount_options *opts = &MSDOS_SB(s)->options;  	va_list args; -	printk(KERN_ERR "FAT: Filesystem panic (dev %s)\n", s->s_id); +	printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id);  	printk(KERN_ERR "    ");  	va_start(args, fmt); @@ -27,13 +32,14 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...)  	va_end(args);  	printk("\n"); -	if (!(s->s_flags & MS_RDONLY)) { +	if (opts->errors == FAT_ERRORS_PANIC) +		panic("    FAT fs panic from previous error\n"); +	else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) {  		s->s_flags |= MS_RDONLY;  		printk(KERN_ERR "    File system has been set read-only\n");  	}  } - -EXPORT_SYMBOL_GPL(fat_fs_panic); +EXPORT_SYMBOL_GPL(fat_fs_error);  /* Flushes the number of free clusters on FAT32 */  /* XXX: Need to write one per FSINFO block.  Currently only writes 1 */ @@ -124,7 +130,7 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)  			mark_inode_dirty(inode);  	}  	if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { -		fat_fs_panic(sb, "clusters badly computed (%d != %llu)", +		fat_fs_error(sb, "clusters badly computed (%d != %llu)",  			     new_fclus,  			     (llu)(inode->i_blocks >> (sbi->cluster_bits - 9)));  		fat_cache_inval_inode(inode); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 20f52286135..82f88733b68 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -608,7 +608,7 @@ error_inode:  		sinfo.bh = NULL;  	}  	if (corrupt < 0) { -		fat_fs_panic(new_dir->i_sb, +		fat_fs_error(new_dir->i_sb,  			     "%s: Filesystem corrupted (i_pos %lld)",  			     __func__, sinfo.i_pos);  	} diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index b50ecbe97f8..8d6fdcfd41d 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1030,7 +1030,7 @@ error_inode:  		sinfo.bh = NULL;  	}  	if (corrupt < 0) { -		fat_fs_panic(new_dir->i_sb, +		fat_fs_error(new_dir->i_sb,  			     "%s: Filesystem corrupted (i_pos %lld)",  			     __func__, sinfo.i_pos);  	} diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index bbbd5f202e3..41d6045dbeb 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -391,6 +391,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)  		}  		XADaddress(xp, xaddr);  		XADlength(xp, xlen); +		XADoffset(xp, prev);  		/*  		 * only preserve the abnr flag within the xad flags  		 * of the returned hint. diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 678a067d925..9edcde4974a 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -475,6 +475,12 @@ struct ocfs2_path {  #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)  #define path_num_items(_path) ((_path)->p_tree_depth + 1) +static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, +			   u32 cpos); +static void ocfs2_adjust_rightmost_records(struct inode *inode, +					   handle_t *handle, +					   struct ocfs2_path *path, +					   struct ocfs2_extent_rec *insert_rec);  /*   * Reset the actual path elements so that we can re-use the structure   * to build another path. Generally, this involves freeing the buffer @@ -1013,6 +1019,54 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list  *el)  }  /* + * Change range of the branches in the right most path according to the leaf + * extent block's rightmost record. + */ +static int ocfs2_adjust_rightmost_branch(handle_t *handle, +					 struct inode *inode, +					 struct ocfs2_extent_tree *et) +{ +	int status; +	struct ocfs2_path *path = NULL; +	struct ocfs2_extent_list *el; +	struct ocfs2_extent_rec *rec; + +	path = ocfs2_new_path_from_et(et); +	if (!path) { +		status = -ENOMEM; +		return status; +	} + +	status = ocfs2_find_path(inode, path, UINT_MAX); +	if (status < 0) { +		mlog_errno(status); +		goto out; +	} + +	status = ocfs2_extend_trans(handle, path_num_items(path) + +				    handle->h_buffer_credits); +	if (status < 0) { +		mlog_errno(status); +		goto out; +	} + +	status = ocfs2_journal_access_path(inode, handle, path); +	if (status < 0) { +		mlog_errno(status); +		goto out; +	} + +	el = path_leaf_el(path); +	rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; + +	ocfs2_adjust_rightmost_records(inode, handle, path, rec); + +out: +	ocfs2_free_path(path); +	return status; +} + +/*   * Add an entire tree branch to our inode. eb_bh is the extent block   * to start at, if we don't want to start the branch at the dinode   * structure. @@ -1038,7 +1092,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,  	struct ocfs2_extent_block *eb;  	struct ocfs2_extent_list  *eb_el;  	struct ocfs2_extent_list  *el; -	u32 new_cpos; +	u32 new_cpos, root_end;  	mlog_entry_void(); @@ -1055,6 +1109,27 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,  	new_blocks = le16_to_cpu(el->l_tree_depth); +	eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; +	new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); +	root_end = ocfs2_sum_rightmost_rec(et->et_root_el); + +	/* +	 * If there is a gap before the root end and the real end +	 * of the righmost leaf block, we need to remove the gap +	 * between new_cpos and root_end first so that the tree +	 * is consistent after we add a new branch(it will start +	 * from new_cpos). +	 */ +	if (root_end > new_cpos) { +		mlog(0, "adjust the cluster end from %u to %u\n", +		     root_end, new_cpos); +		status = ocfs2_adjust_rightmost_branch(handle, inode, et); +		if (status) { +			mlog_errno(status); +			goto bail; +		} +	} +  	/* allocate the number of new eb blocks we need */  	new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),  			     GFP_KERNEL); @@ -1071,9 +1146,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,  		goto bail;  	} -	eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; -	new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); -  	/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be  	 * linked with the rest of the tree.  	 * conversly, new_eb_bhs[0] is the new bottommost leaf. diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 2a947c44e59..a1163b8b417 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -22,6 +22,9 @@  #include <linux/crc32.h>  #include <linux/buffer_head.h>  #include <linux/bitops.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/fs.h>  #include <asm/byteorder.h>  #include <cluster/masklog.h> @@ -222,6 +225,155 @@ void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,  	ocfs2_hamming_fix(data, blocksize * 8, 0, fix);  } + +/* + * Debugfs handling. + */ + +#ifdef CONFIG_DEBUG_FS + +static int blockcheck_u64_get(void *data, u64 *val) +{ +	*val = *(u64 *)data; +	return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n"); + +static struct dentry *blockcheck_debugfs_create(const char *name, +						struct dentry *parent, +						u64 *value) +{ +	return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value, +				   &blockcheck_fops); +} + +static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) +{ +	if (stats) { +		debugfs_remove(stats->b_debug_check); +		stats->b_debug_check = NULL; +		debugfs_remove(stats->b_debug_failure); +		stats->b_debug_failure = NULL; +		debugfs_remove(stats->b_debug_recover); +		stats->b_debug_recover = NULL; +		debugfs_remove(stats->b_debug_dir); +		stats->b_debug_dir = NULL; +	} +} + +static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, +					  struct dentry *parent) +{ +	int rc = -EINVAL; + +	if (!stats) +		goto out; + +	stats->b_debug_dir = debugfs_create_dir("blockcheck", parent); +	if (!stats->b_debug_dir) +		goto out; + +	stats->b_debug_check = +		blockcheck_debugfs_create("blocks_checked", +					  stats->b_debug_dir, +					  &stats->b_check_count); + +	stats->b_debug_failure = +		blockcheck_debugfs_create("checksums_failed", +					  stats->b_debug_dir, +					  &stats->b_failure_count); + +	stats->b_debug_recover = +		blockcheck_debugfs_create("ecc_recoveries", +					  stats->b_debug_dir, +					  &stats->b_recover_count); +	if (stats->b_debug_check && stats->b_debug_failure && +	    stats->b_debug_recover) +		rc = 0; + +out: +	if (rc) +		ocfs2_blockcheck_debug_remove(stats); +	return rc; +} +#else +static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, +						 struct dentry *parent) +{ +	return 0; +} + +static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) +{ +} +#endif  /* CONFIG_DEBUG_FS */ + +/* Always-called wrappers for starting and stopping the debugfs files */ +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, +					   struct dentry *parent) +{ +	return ocfs2_blockcheck_debug_install(stats, parent); +} + +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats) +{ +	ocfs2_blockcheck_debug_remove(stats); +} + +static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats) +{ +	u64 new_count; + +	if (!stats) +		return; + +	spin_lock(&stats->b_lock); +	stats->b_check_count++; +	new_count = stats->b_check_count; +	spin_unlock(&stats->b_lock); + +	if (!new_count) +		mlog(ML_NOTICE, "Block check count has wrapped\n"); +} + +static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats) +{ +	u64 new_count; + +	if (!stats) +		return; + +	spin_lock(&stats->b_lock); +	stats->b_failure_count++; +	new_count = stats->b_failure_count; +	spin_unlock(&stats->b_lock); + +	if (!new_count) +		mlog(ML_NOTICE, "Checksum failure count has wrapped\n"); +} + +static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats) +{ +	u64 new_count; + +	if (!stats) +		return; + +	spin_lock(&stats->b_lock); +	stats->b_recover_count++; +	new_count = stats->b_recover_count; +	spin_unlock(&stats->b_lock); + +	if (!new_count) +		mlog(ML_NOTICE, "ECC recovery count has wrapped\n"); +} + + + +/* + * These are the low-level APIs for using the ocfs2_block_check structure. + */ +  /*   * This function generates check information for a block.   * data is the block to be checked.  bc is a pointer to the @@ -266,12 +418,15 @@ void ocfs2_block_check_compute(void *data, size_t blocksize,   * Again, the data passed in should be the on-disk endian.   */  int ocfs2_block_check_validate(void *data, size_t blocksize, -			       struct ocfs2_block_check *bc) +			       struct ocfs2_block_check *bc, +			       struct ocfs2_blockcheck_stats *stats)  {  	int rc = 0;  	struct ocfs2_block_check check;  	u32 crc, ecc; +	ocfs2_blockcheck_inc_check(stats); +  	check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);  	check.bc_ecc = le16_to_cpu(bc->bc_ecc); @@ -282,6 +437,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,  	if (crc == check.bc_crc32e)  		goto out; +	ocfs2_blockcheck_inc_failure(stats);  	mlog(ML_ERROR,  	     "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",  	     (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -292,8 +448,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,  	/* And check the crc32 again */  	crc = crc32_le(~0, data, blocksize); -	if (crc == check.bc_crc32e) +	if (crc == check.bc_crc32e) { +		ocfs2_blockcheck_inc_recover(stats);  		goto out; +	}  	mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",  	     (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -366,7 +524,8 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,   * Again, the data passed in should be the on-disk endian.   */  int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, -				   struct ocfs2_block_check *bc) +				   struct ocfs2_block_check *bc, +				   struct ocfs2_blockcheck_stats *stats)  {  	int i, rc = 0;  	struct ocfs2_block_check check; @@ -377,6 +536,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,  	if (!nr)  		return 0; +	ocfs2_blockcheck_inc_check(stats); +  	check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);  	check.bc_ecc = le16_to_cpu(bc->bc_ecc); @@ -388,6 +549,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,  	if (crc == check.bc_crc32e)  		goto out; +	ocfs2_blockcheck_inc_failure(stats);  	mlog(ML_ERROR,  	     "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",  	     (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -416,8 +578,10 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,  	/* And check the crc32 again */  	for (i = 0, crc = ~0; i < nr; i++)  		crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); -	if (crc == check.bc_crc32e) +	if (crc == check.bc_crc32e) { +		ocfs2_blockcheck_inc_recover(stats);  		goto out; +	}  	mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",  	     (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -448,9 +612,11 @@ int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,  			    struct ocfs2_block_check *bc)  {  	int rc = 0; +	struct ocfs2_super *osb = OCFS2_SB(sb); -	if (ocfs2_meta_ecc(OCFS2_SB(sb))) -		rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc); +	if (ocfs2_meta_ecc(osb)) +		rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc, +						&osb->osb_ecc_stats);  	return rc;  } @@ -468,9 +634,11 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,  				struct ocfs2_block_check *bc)  {  	int rc = 0; +	struct ocfs2_super *osb = OCFS2_SB(sb); -	if (ocfs2_meta_ecc(OCFS2_SB(sb))) -		rc = ocfs2_block_check_validate_bhs(bhs, nr, bc); +	if (ocfs2_meta_ecc(osb)) +		rc = ocfs2_block_check_validate_bhs(bhs, nr, bc, +						    &osb->osb_ecc_stats);  	return rc;  } diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h index 70ec3feda32..d4b69febf70 100644 --- a/fs/ocfs2/blockcheck.h +++ b/fs/ocfs2/blockcheck.h @@ -21,6 +21,24 @@  #define OCFS2_BLOCKCHECK_H +/* Count errors and error correction from blockcheck.c */ +struct ocfs2_blockcheck_stats { +	spinlock_t b_lock; +	u64 b_check_count;	/* Number of blocks we've checked */ +	u64 b_failure_count;	/* Number of failed checksums */ +	u64 b_recover_count;	/* Number of blocks fixed by ecc */ + +	/* +	 * debugfs entries, used if this is passed to +	 * ocfs2_blockcheck_stats_debugfs_install() +	 */ +	struct dentry *b_debug_dir;	/* Parent of the debugfs  files */ +	struct dentry *b_debug_check;	/* Exposes b_check_count */ +	struct dentry *b_debug_failure;	/* Exposes b_failure_count */ +	struct dentry *b_debug_recover;	/* Exposes b_recover_count */ +}; + +  /* High level block API */  void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,  			    struct ocfs2_block_check *bc); @@ -37,11 +55,18 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,  void ocfs2_block_check_compute(void *data, size_t blocksize,  			       struct ocfs2_block_check *bc);  int ocfs2_block_check_validate(void *data, size_t blocksize, -			       struct ocfs2_block_check *bc); +			       struct ocfs2_block_check *bc, +			       struct ocfs2_blockcheck_stats *stats);  void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,  				   struct ocfs2_block_check *bc);  int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, -				   struct ocfs2_block_check *bc); +				   struct ocfs2_block_check *bc, +				   struct ocfs2_blockcheck_stats *stats); + +/* Debug Initialization */ +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, +					   struct dentry *parent); +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats);  /*   * Hamming code functions diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 7e72a81bc2d..696c32e5071 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -48,34 +48,33 @@   * only emit the appropriage printk() when the caller passes in a constant   * mask, as is almost always the case.   * - * All this bitmask nonsense is hidden from the /proc interface so that Joel - * doesn't have an aneurism.  Reading the file gives a straight forward - * indication of which bits are on or off: - * 	ENTRY off - * 	EXIT off + * All this bitmask nonsense is managed from the files under + * /sys/fs/o2cb/logmask/.  Reading the files gives a straightforward + * indication of which bits are allowed (allow) or denied (off/deny). + * 	ENTRY deny + * 	EXIT deny   * 	TCP off   * 	MSG off   * 	SOCKET off - * 	ERROR off - * 	NOTICE on + * 	ERROR allow + * 	NOTICE allow   *   * Writing changes the state of a given bit and requires a strictly formatted   * single write() call:   * - * 	write(fd, "ENTRY on", 8); + * 	write(fd, "allow", 5);   * - * would turn the entry bit on.  "1" is also accepted in the place of "on", and - * "off" and "0" behave as expected. + * Echoing allow/deny/off string into the logmask files can flip the bits + * on or off as expected; here is the bash script for example:   * - * Some trivial shell can flip all the bits on or off: + * log_mask="/sys/fs/o2cb/log_mask" + * for node in ENTRY EXIT TCP MSG SOCKET ERROR NOTICE; do + *	echo allow >"$log_mask"/"$node" + * done   * - * log_mask="/proc/fs/ocfs2_nodemanager/log_mask" - * cat $log_mask | ( - * 	while read bit status; do - * 		# $1 is "on" or "off", say - * 		echo "$bit $1" > $log_mask - * 	done - * ) + * The debugfs.ocfs2 tool can also flip the bits with the -l option: + * + * debugfs.ocfs2 -l TCP allow   */  /* for task_struct */ diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 9fbe849f634..334f231a422 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -974,7 +974,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,  int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,  			   size_t caller_veclen, u8 target_node, int *status)  { -	int ret, error = 0; +	int ret;  	struct o2net_msg *msg = NULL;  	size_t veclen, caller_bytes = 0;  	struct kvec *vec = NULL; @@ -1015,10 +1015,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,  	o2net_set_nst_sock_time(&nst); -	ret = wait_event_interruptible(nn->nn_sc_wq, -				       o2net_tx_can_proceed(nn, &sc, &error)); -	if (!ret && error) -		ret = error; +	wait_event(nn->nn_sc_wq, o2net_tx_can_proceed(nn, &sc, &ret));  	if (ret)  		goto out; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c5752305627..b358f3bf896 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2900,6 +2900,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,  	alloc = ocfs2_clusters_for_bytes(sb, bytes);  	dx_alloc = 0; +	down_write(&oi->ip_alloc_sem); +  	if (ocfs2_supports_indexed_dirs(osb)) {  		credits += ocfs2_add_dir_index_credits(sb); @@ -2940,8 +2942,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,  		goto out;  	} -	down_write(&oi->ip_alloc_sem); -  	/*  	 * Prepare for worst case allocation scenario of two separate  	 * extents in the unindexed tree. @@ -2953,7 +2953,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,  	if (IS_ERR(handle)) {  		ret = PTR_ERR(handle);  		mlog_errno(ret); -		goto out_sem; +		goto out;  	}  	if (vfs_dq_alloc_space_nodirty(dir, @@ -3172,10 +3172,8 @@ out_commit:  	ocfs2_commit_trans(osb, handle); -out_sem: -	up_write(&oi->ip_alloc_sem); -  out: +	up_write(&oi->ip_alloc_sem);  	if (data_ac)  		ocfs2_free_alloc_context(data_ac);  	if (meta_ac) @@ -3322,11 +3320,15 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,  		brelse(new_bh);  		new_bh = NULL; +		down_write(&OCFS2_I(dir)->ip_alloc_sem); +		drop_alloc_sem = 1;  		dir_i_size = i_size_read(dir);  		credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;  		goto do_extend;  	} +	down_write(&OCFS2_I(dir)->ip_alloc_sem); +	drop_alloc_sem = 1;  	dir_i_size = i_size_read(dir);  	mlog(0, "extending dir %llu (i_size = %lld)\n",  	     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); @@ -3370,9 +3372,6 @@ do_extend:  		credits++; /* For attaching the new dirent block to the  			    * dx_root */ -	down_write(&OCFS2_I(dir)->ip_alloc_sem); -	drop_alloc_sem = 1; -  	handle = ocfs2_start_trans(osb, credits);  	if (IS_ERR(handle)) {  		status = PTR_ERR(handle); @@ -3435,10 +3434,10 @@ bail_bh:  	*new_de_bh = new_bh;  	get_bh(*new_de_bh);  bail: -	if (drop_alloc_sem) -		up_write(&OCFS2_I(dir)->ip_alloc_sem);  	if (handle)  		ocfs2_commit_trans(osb, handle); +	if (drop_alloc_sem) +		up_write(&OCFS2_I(dir)->ip_alloc_sem);  	if (data_ac)  		ocfs2_free_alloc_context(data_ac); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index e15fc7d5082..6cdeaa76f27 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -248,6 +248,10 @@ static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {  	.flags		= 0,  }; +static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { +	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, +}; +  static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {  	.get_osb	= ocfs2_get_dentry_osb,  	.post_unlock	= ocfs2_dentry_post_unlock, @@ -637,6 +641,19 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,  				   &ocfs2_nfs_sync_lops, osb);  } +static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, +					    struct ocfs2_super *osb) +{ +	struct ocfs2_orphan_scan_lvb *lvb; + +	ocfs2_lock_res_init_once(res); +	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); +	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, +				   &ocfs2_orphan_scan_lops, osb); +	lvb = ocfs2_dlm_lvb(&res->l_lksb); +	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; +} +  void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,  			      struct ocfs2_file_private *fp)  { @@ -2352,6 +2369,37 @@ void ocfs2_inode_unlock(struct inode *inode,  	mlog_exit_void();  } +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) +{ +	struct ocfs2_lock_res *lockres; +	struct ocfs2_orphan_scan_lvb *lvb; +	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; +	int status = 0; + +	lockres = &osb->osb_orphan_scan.os_lockres; +	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); +	if (status < 0) +		return status; + +	lvb = ocfs2_dlm_lvb(&lockres->l_lksb); +	if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) +		*seqno = be32_to_cpu(lvb->lvb_os_seqno); +	return status; +} + +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) +{ +	struct ocfs2_lock_res *lockres; +	struct ocfs2_orphan_scan_lvb *lvb; +	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + +	lockres = &osb->osb_orphan_scan.os_lockres; +	lvb = ocfs2_dlm_lvb(&lockres->l_lksb); +	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; +	lvb->lvb_os_seqno = cpu_to_be32(seqno); +	ocfs2_cluster_unlock(osb, lockres, level); +} +  int ocfs2_super_lock(struct ocfs2_super *osb,  		     int ex)  { @@ -2842,6 +2890,7 @@ local:  	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);  	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);  	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); +	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);  	osb->cconn = conn; @@ -2878,6 +2927,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,  	ocfs2_lock_res_free(&osb->osb_super_lockres);  	ocfs2_lock_res_free(&osb->osb_rename_lockres);  	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); +	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);  	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);  	osb->cconn = NULL; @@ -3061,6 +3111,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)  	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);  	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);  	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); +	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);  }  int ocfs2_drop_inode_locks(struct inode *inode) diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index e1fd5721cd7..31b90d7b8f5 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -62,6 +62,14 @@ struct ocfs2_qinfo_lvb {  	__be32	lvb_free_entry;  }; +#define OCFS2_ORPHAN_LVB_VERSION 1 + +struct ocfs2_orphan_scan_lvb { +	__u8	lvb_version; +	__u8	lvb_reserved[3]; +	__be32	lvb_os_seqno; +}; +  /* ocfs2_inode_lock_full() 'arg_flags' flags */  /* don't wait on recovery. */  #define OCFS2_META_LOCK_RECOVERY	(0x01) @@ -113,6 +121,9 @@ int ocfs2_super_lock(struct ocfs2_super *osb,  		     int ex);  void ocfs2_super_unlock(struct ocfs2_super *osb,  			int ex); +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex); +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex); +  int ocfs2_rename_lock(struct ocfs2_super *osb);  void ocfs2_rename_unlock(struct ocfs2_super *osb);  int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c2a87c885b7..07267e0da90 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -187,6 +187,9 @@ static int ocfs2_sync_file(struct file *file,  	if (err)  		goto bail; +	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) +		goto bail; +  	journal = osb->journal->j_journal;  	err = jbd2_journal_force_commit(journal); @@ -894,9 +897,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)  	struct ocfs2_super *osb = OCFS2_SB(sb);  	struct buffer_head *bh = NULL;  	handle_t *handle = NULL; -	int locked[MAXQUOTAS] = {0, 0}; -	int credits, qtype; -	struct ocfs2_mem_dqinfo *oinfo; +	int qtype; +	struct dquot *transfer_from[MAXQUOTAS] = { }; +	struct dquot *transfer_to[MAXQUOTAS] = { };  	mlog_entry("(0x%p, '%.*s')\n", dentry,  	           dentry->d_name.len, dentry->d_name.name); @@ -969,30 +972,37 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)  	if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||  	    (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { -		credits = OCFS2_INODE_UPDATE_CREDITS; +		/* +		 * Gather pointers to quota structures so that allocation / +		 * freeing of quota structures happens here and not inside +		 * vfs_dq_transfer() where we have problems with lock ordering +		 */  		if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid  		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,  		    OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { -			oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv; -			status = ocfs2_lock_global_qf(oinfo, 1); -			if (status < 0) +			transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, +						      USRQUOTA); +			transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, +							USRQUOTA); +			if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) { +				status = -ESRCH;  				goto bail_unlock; -			credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) + -				ocfs2_calc_qdel_credits(sb, USRQUOTA); -			locked[USRQUOTA] = 1; +			}  		}  		if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid  		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,  		    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { -			oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv; -			status = ocfs2_lock_global_qf(oinfo, 1); -			if (status < 0) +			transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, +						      GRPQUOTA); +			transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, +							GRPQUOTA); +			if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) { +				status = -ESRCH;  				goto bail_unlock; -			credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) + -				   ocfs2_calc_qdel_credits(sb, GRPQUOTA); -			locked[GRPQUOTA] = 1; +			}  		} -		handle = ocfs2_start_trans(osb, credits); +		handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS + +					   2 * ocfs2_quota_trans_credits(sb));  		if (IS_ERR(handle)) {  			status = PTR_ERR(handle);  			mlog_errno(status); @@ -1030,12 +1040,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)  bail_commit:  	ocfs2_commit_trans(osb, handle);  bail_unlock: -	for (qtype = 0; qtype < MAXQUOTAS; qtype++) { -		if (!locked[qtype]) -			continue; -		oinfo = sb_dqinfo(sb, qtype)->dqi_priv; -		ocfs2_unlock_global_qf(oinfo, 1); -	}  	ocfs2_inode_unlock(inode, 1);  bail_unlock_rw:  	if (size_change) @@ -1043,6 +1047,12 @@ bail_unlock_rw:  bail:  	brelse(bh); +	/* Release quota pointers in case we acquired them */ +	for (qtype = 0; qtype < MAXQUOTAS; qtype++) { +		dqput(transfer_to[qtype]); +		dqput(transfer_from[qtype]); +	} +  	if (!status && attr->ia_valid & ATTR_MODE) {  		status = ocfs2_acl_chmod(inode);  		if (status < 0) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a20a0f1e37f..4a3b9e6b31a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -28,6 +28,8 @@  #include <linux/slab.h>  #include <linux/highmem.h>  #include <linux/kthread.h> +#include <linux/time.h> +#include <linux/random.h>  #define MLOG_MASK_PREFIX ML_JOURNAL  #include <cluster/masklog.h> @@ -52,6 +54,8 @@  DEFINE_SPINLOCK(trans_inc_lock); +#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000 +  static int ocfs2_force_read_journal(struct inode *inode);  static int ocfs2_recover_node(struct ocfs2_super *osb,  			      int node_num, int slot_num); @@ -1841,6 +1845,113 @@ bail:  	return status;  } +/* + * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some + * randomness to the timeout to minimize multple nodes firing the timer at the + * same time. + */ +static inline unsigned long ocfs2_orphan_scan_timeout(void) +{ +	unsigned long time; + +	get_random_bytes(&time, sizeof(time)); +	time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000); +	return msecs_to_jiffies(time); +} + +/* + * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for + * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This + * is done to catch any orphans that are left over in orphan directories. + * + * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT + * seconds.  It gets an EX lock on os_lockres and checks sequence number + * stored in LVB. If the sequence number has changed, it means some other + * node has done the scan.  This node skips the scan and tracks the + * sequence number.  If the sequence number didn't change, it means a scan + * hasn't happened.  The node queues a scan and increments the + * sequence number in the LVB. + */ +void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) +{ +	struct ocfs2_orphan_scan *os; +	int status, i; +	u32 seqno = 0; + +	os = &osb->osb_orphan_scan; + +	status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); +	if (status < 0) { +		if (status != -EAGAIN) +			mlog_errno(status); +		goto out; +	} + +	if (os->os_seqno != seqno) { +		os->os_seqno = seqno; +		goto unlock; +	} + +	for (i = 0; i < osb->max_slots; i++) +		ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, +						NULL); +	/* +	 * We queued a recovery on orphan slots, increment the sequence +	 * number and update LVB so other node will skip the scan for a while +	 */ +	seqno++; +	os->os_count++; +	os->os_scantime = CURRENT_TIME; +unlock: +	ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); +out: +	return; +} + +/* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */ +void ocfs2_orphan_scan_work(struct work_struct *work) +{ +	struct ocfs2_orphan_scan *os; +	struct ocfs2_super *osb; + +	os = container_of(work, struct ocfs2_orphan_scan, +			  os_orphan_scan_work.work); +	osb = os->os_osb; + +	mutex_lock(&os->os_lock); +	ocfs2_queue_orphan_scan(osb); +	schedule_delayed_work(&os->os_orphan_scan_work, +			      ocfs2_orphan_scan_timeout()); +	mutex_unlock(&os->os_lock); +} + +void ocfs2_orphan_scan_stop(struct ocfs2_super *osb) +{ +	struct ocfs2_orphan_scan *os; + +	os = &osb->osb_orphan_scan; +	mutex_lock(&os->os_lock); +	cancel_delayed_work(&os->os_orphan_scan_work); +	mutex_unlock(&os->os_lock); +} + +int ocfs2_orphan_scan_init(struct ocfs2_super *osb) +{ +	struct ocfs2_orphan_scan *os; + +	os = &osb->osb_orphan_scan; +	os->os_osb = osb; +	os->os_count = 0; +	os->os_scantime = CURRENT_TIME; +	mutex_init(&os->os_lock); + +	INIT_DELAYED_WORK(&os->os_orphan_scan_work, +			  ocfs2_orphan_scan_work); +	schedule_delayed_work(&os->os_orphan_scan_work, +			      ocfs2_orphan_scan_timeout()); +	return 0; +} +  struct ocfs2_orphan_filldir_priv {  	struct inode		*head;  	struct ocfs2_super	*osb; diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index eb7b76331eb..61045eeb3f6 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -144,6 +144,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,  }  /* Exported only for the journal struct init code in super.c. Do not call. */ +int ocfs2_orphan_scan_init(struct ocfs2_super *osb); +void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); +void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); +  void ocfs2_complete_recovery(struct work_struct *work);  void ocfs2_wait_for_recovery(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1386281950d..18c1d9ec1c9 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -47,6 +47,9 @@  #include "ocfs2_fs.h"  #include "ocfs2_lockid.h" +/* For struct ocfs2_blockcheck_stats */ +#include "blockcheck.h" +  /* Most user visible OCFS2 inodes will have very few pieces of   * metadata, but larger files (including bitmaps, etc) must be taken   * into account when designing an access scheme. We allow a small @@ -151,6 +154,16 @@ struct ocfs2_lock_res {  #endif  }; +struct ocfs2_orphan_scan { +	struct mutex 		os_lock; +	struct ocfs2_super 	*os_osb; +	struct ocfs2_lock_res 	os_lockres;     /* lock to synchronize scans */ +	struct delayed_work 	os_orphan_scan_work; +	struct timespec		os_scantime;  /* time this node ran the scan */ +	u32			os_count;      /* tracks node specific scans */ +	u32  			os_seqno;       /* tracks cluster wide scans */ +}; +  struct ocfs2_dlm_debug {  	struct kref d_refcnt;  	struct dentry *d_locking_state; @@ -295,6 +308,7 @@ struct ocfs2_super  	struct ocfs2_dinode *local_alloc_copy;  	struct ocfs2_quota_recovery *quota_rec; +	struct ocfs2_blockcheck_stats osb_ecc_stats;  	struct ocfs2_alloc_stats alloc_stats;  	char dev_str[20];		/* "major,minor" of the device */ @@ -341,6 +355,8 @@ struct ocfs2_super  	unsigned int			*osb_orphan_wipes;  	wait_queue_head_t		osb_wipe_event; +	struct ocfs2_orphan_scan	osb_orphan_scan; +  	/* used to protect metaecc calculation check of xattr. */  	spinlock_t osb_xattr_lock; diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index a53ce87481b..fcdba091af3 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -48,6 +48,7 @@ enum ocfs2_lock_type {  	OCFS2_LOCK_TYPE_FLOCK,  	OCFS2_LOCK_TYPE_QINFO,  	OCFS2_LOCK_TYPE_NFS_SYNC, +	OCFS2_LOCK_TYPE_ORPHAN_SCAN,  	OCFS2_NUM_LOCK_TYPES  }; @@ -85,6 +86,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)  		case OCFS2_LOCK_TYPE_NFS_SYNC:  			c = 'Y';  			break; +		case OCFS2_LOCK_TYPE_ORPHAN_SCAN: +			c = 'P'; +			break;  		default:  			c = '\0';  	} @@ -104,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = {  	[OCFS2_LOCK_TYPE_OPEN] = "Open",  	[OCFS2_LOCK_TYPE_FLOCK] = "Flock",  	[OCFS2_LOCK_TYPE_QINFO] = "Quota", +	[OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",  };  static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 1ed0f7c8686..edfa60cd155 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -421,6 +421,7 @@ int ocfs2_global_read_dquot(struct dquot *dquot)  	OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;  	if (!dquot->dq_off) {	/* No real quota entry? */  		/* Upgrade to exclusive lock for allocation */ +		ocfs2_qinfo_unlock(info, 0);  		err = ocfs2_qinfo_lock(info, 1);  		if (err < 0)  			goto out_qlock; @@ -435,7 +436,8 @@ int ocfs2_global_read_dquot(struct dquot *dquot)  out_qlock:  	if (ex)  		ocfs2_qinfo_unlock(info, 1); -	ocfs2_qinfo_unlock(info, 0); +	else +		ocfs2_qinfo_unlock(info, 0);  out:  	if (err < 0)  		mlog_errno(err); diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 07deec5e972..5a460fa8255 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -444,10 +444,6 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,  	mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type); -	status = ocfs2_lock_global_qf(oinfo, 1); -	if (status < 0) -		goto out; -  	list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {  		chunk = rchunk->rc_chunk;  		hbh = NULL; @@ -480,12 +476,18 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,  				     type);  				goto out_put_bh;  			} +			status = ocfs2_lock_global_qf(oinfo, 1); +			if (status < 0) { +				mlog_errno(status); +				goto out_put_dquot; +			} +  			handle = ocfs2_start_trans(OCFS2_SB(sb),  						   OCFS2_QSYNC_CREDITS);  			if (IS_ERR(handle)) {  				status = PTR_ERR(handle);  				mlog_errno(status); -				goto out_put_dquot; +				goto out_drop_lock;  			}  			mutex_lock(&sb_dqopt(sb)->dqio_mutex);  			spin_lock(&dq_data_lock); @@ -523,6 +525,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,  out_commit:  			mutex_unlock(&sb_dqopt(sb)->dqio_mutex);  			ocfs2_commit_trans(OCFS2_SB(sb), handle); +out_drop_lock: +			ocfs2_unlock_global_qf(oinfo, 1);  out_put_dquot:  			dqput(dquot);  out_put_bh: @@ -537,8 +541,6 @@ out_put_bh:  		if (status < 0)  			break;  	} -	ocfs2_unlock_global_qf(oinfo, 1); -out:  	if (status < 0)  		free_recovery_list(&(rec->r_list[type]));  	mlog_exit(status); @@ -655,6 +657,9 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)  	struct ocfs2_quota_recovery *rec;  	int locked = 0; +	/* We don't need the lock and we have to acquire quota file locks +	 * which will later depend on this lock */ +	mutex_unlock(&sb_dqopt(sb)->dqio_mutex);  	info->dqi_maxblimit = 0x7fffffffffffffffLL;  	info->dqi_maxilimit = 0x7fffffffffffffffLL;  	oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); @@ -733,6 +738,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)  		goto out_err;  	} +	mutex_lock(&sb_dqopt(sb)->dqio_mutex);  	return 0;  out_err:  	if (oinfo) { @@ -746,6 +752,7 @@ out_err:  		kfree(oinfo);  	}  	brelse(bh); +	mutex_lock(&sb_dqopt(sb)->dqio_mutex);  	return -1;  } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 201b40a441f..d33767f17ba 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -119,10 +119,12 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb);  static int ocfs2_check_volume(struct ocfs2_super *osb);  static int ocfs2_verify_volume(struct ocfs2_dinode *di,  			       struct buffer_head *bh, -			       u32 sectsize); +			       u32 sectsize, +			       struct ocfs2_blockcheck_stats *stats);  static int ocfs2_initialize_super(struct super_block *sb,  				  struct buffer_head *bh, -				  int sector_size); +				  int sector_size, +				  struct ocfs2_blockcheck_stats *stats);  static int ocfs2_get_sector(struct super_block *sb,  			    struct buffer_head **bh,  			    int block, @@ -207,6 +209,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)  	int i;  	struct ocfs2_cluster_connection *cconn = osb->cconn;  	struct ocfs2_recovery_map *rm = osb->recovery_map; +	struct ocfs2_orphan_scan *os;  	out += snprintf(buf + out, len - out,  			"%10s => Id: %-s  Uuid: %-s  Gen: 0x%X  Label: %-s\n", @@ -308,6 +311,13 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)  				i, osb->slot_recovery_generations[i]);  	} +	os = &osb->osb_orphan_scan; +	out += snprintf(buf + out, len - out, "Orphan Scan=> "); +	out += snprintf(buf + out, len - out, "Local: %u  Global: %u ", +			os->os_count, os->os_seqno); +	out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n", +			(get_seconds() - os->os_scantime.tv_sec)); +  	return out;  } @@ -693,7 +703,8 @@ out:  static int ocfs2_sb_probe(struct super_block *sb,  			  struct buffer_head **bh, -			  int *sector_size) +			  int *sector_size, +			  struct ocfs2_blockcheck_stats *stats)  {  	int status, tmpstat;  	struct ocfs1_vol_disk_hdr *hdr; @@ -759,7 +770,8 @@ static int ocfs2_sb_probe(struct super_block *sb,  			goto bail;  		}  		di = (struct ocfs2_dinode *) (*bh)->b_data; -		status = ocfs2_verify_volume(di, *bh, blksize); +		memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); +		status = ocfs2_verify_volume(di, *bh, blksize, stats);  		if (status >= 0)  			goto bail;  		brelse(*bh); @@ -965,6 +977,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)  	struct ocfs2_super *osb = NULL;  	struct buffer_head *bh = NULL;  	char nodestr[8]; +	struct ocfs2_blockcheck_stats stats;  	mlog_entry("%p, %p, %i", sb, data, silent); @@ -974,13 +987,13 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)  	}  	/* probe for superblock */ -	status = ocfs2_sb_probe(sb, &bh, §or_size); +	status = ocfs2_sb_probe(sb, &bh, §or_size, &stats);  	if (status < 0) {  		mlog(ML_ERROR, "superblock probe failed!\n");  		goto read_super_error;  	} -	status = ocfs2_initialize_super(sb, bh, sector_size); +	status = ocfs2_initialize_super(sb, bh, sector_size, &stats);  	osb = OCFS2_SB(sb);  	if (status < 0) {  		mlog_errno(status); @@ -1090,6 +1103,18 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)  		goto read_super_error;  	} +	if (ocfs2_meta_ecc(osb)) { +		status = ocfs2_blockcheck_stats_debugfs_install( +						&osb->osb_ecc_stats, +						osb->osb_debug_root); +		if (status) { +			mlog(ML_ERROR, +			     "Unable to create blockcheck statistics " +			     "files\n"); +			goto read_super_error; +		} +	} +  	status = ocfs2_mount_volume(sb);  	if (osb->root_inode)  		inode = igrab(osb->root_inode); @@ -1760,13 +1785,8 @@ static int ocfs2_mount_volume(struct super_block *sb)  	}  	status = ocfs2_truncate_log_init(osb); -	if (status < 0) { +	if (status < 0)  		mlog_errno(status); -		goto leave; -	} - -	if (ocfs2_mount_local(osb)) -		goto leave;  leave:  	if (unlock_super) @@ -1796,6 +1816,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)  	ocfs2_truncate_log_shutdown(osb); +	ocfs2_orphan_scan_stop(osb); +  	/* This will disable recovery and flush any recovery work. */  	ocfs2_recovery_exit(osb); @@ -1833,6 +1855,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)  	if (osb->cconn)  		ocfs2_dlm_shutdown(osb, hangup_needed); +	ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);  	debugfs_remove(osb->osb_debug_root);  	if (hangup_needed) @@ -1880,7 +1903,8 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu  static int ocfs2_initialize_super(struct super_block *sb,  				  struct buffer_head *bh, -				  int sector_size) +				  int sector_size, +				  struct ocfs2_blockcheck_stats *stats)  {  	int status;  	int i, cbits, bbits; @@ -1939,6 +1963,9 @@ static int ocfs2_initialize_super(struct super_block *sb,  	atomic_set(&osb->alloc_stats.bg_allocs, 0);  	atomic_set(&osb->alloc_stats.bg_extends, 0); +	/* Copy the blockcheck stats from the superblock probe */ +	osb->osb_ecc_stats = *stats; +  	ocfs2_init_node_maps(osb);  	snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", @@ -1951,6 +1978,13 @@ static int ocfs2_initialize_super(struct super_block *sb,  		goto bail;  	} +	status = ocfs2_orphan_scan_init(osb); +	if (status) { +		mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n"); +		mlog_errno(status); +		goto bail; +	} +  	init_waitqueue_head(&osb->checkpoint_event);  	atomic_set(&osb->needs_checkpoint, 0); @@ -2169,7 +2203,8 @@ bail:   */  static int ocfs2_verify_volume(struct ocfs2_dinode *di,  			       struct buffer_head *bh, -			       u32 blksz) +			       u32 blksz, +			       struct ocfs2_blockcheck_stats *stats)  {  	int status = -EAGAIN; @@ -2182,7 +2217,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,  		    OCFS2_FEATURE_INCOMPAT_META_ECC) {  			status = ocfs2_block_check_validate(bh->b_data,  							    bh->b_size, -							    &di->i_check); +							    &di->i_check, +							    stats);  			if (status)  				goto out;  		} diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 15631019dc6..ba320e25074 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3154,7 +3154,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,  		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));  		if (func) {  			ret = func(inode, bucket, para); -			if (ret) +			if (ret && ret != -ERANGE)  				mlog_errno(ret);  			/* Fall through to bucket_relse() */  		} @@ -3261,7 +3261,8 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,  						  ocfs2_list_xattr_bucket,  						  &xl);  		if (ret) { -			mlog_errno(ret); +			if (ret != -ERANGE) +				mlog_errno(ret);  			goto out;  		} diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3589eab02a2..3260b73abe2 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1937,6 +1937,9 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)  	err  = bdi_init(&c->bdi);  	if (err)  		goto out_close; +	err = bdi_register(&c->bdi, NULL, "ubifs"); +	if (err) +		goto out_bdi;  	err = ubifs_parse_options(c, data, 0);  	if (err) | 
