diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-04-11 15:45:47 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-04-11 15:45:47 -0700 | 
| commit | a97b52022a73ec12e43f0b2c7d4bd1f40f89c81d (patch) | |
| tree | 1a35544915a5704fa59c63b43e9f46e20be1e296 | |
| parent | 18770c7c3a0ccd60017ac76b5d2e7d1f71376b94 (diff) | |
| parent | c8205636029fc869278c55b7336053b3e7ae3ef4 (diff) | |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix data corruption regression by reverting commit 6de9843dab3f
  ext4: Allow indirect-block file to grow the file size to max file size
  ext4: allow an active handle to be started when freezing
  ext4: sync the directory inode in ext4_sync_parent()
  ext4: init timer earlier to avoid a kernel panic in __save_error_info
  jbd2: fix potential memory leak on transaction commit
  ext4: fix a double free in ext4_register_li_request
  ext4: fix credits computing for indirect mapped files
  ext4: remove unnecessary [cm]time update of quota file
  jbd2: move bdget out of critical section
| -rw-r--r-- | fs/ext4/ext4_jbd2.h | 4 | ||||
| -rw-r--r-- | fs/ext4/fsync.c | 17 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 35 | ||||
| -rw-r--r-- | fs/ext4/super.c | 74 | ||||
| -rw-r--r-- | fs/jbd2/commit.c | 4 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 3 | 
6 files changed, 102 insertions, 35 deletions
| diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index e25e99bf7ee..d0f53538a57 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -86,8 +86,8 @@  #ifdef CONFIG_QUOTA  /* Amount of blocks needed for quota update - we know that the structure was - * allocated so we need to update only inode+data */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) + * allocated so we need to update only data block */ +#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)  /* Amount of blocks needed for quota insert/delete - we do some block writes   * but inode, sb and group updates are done only once */  #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 4673bc05274..e9473cbe80d 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode)   * the parent directory's parent as well, and so on recursively, if   * they are also freshly created.   */ -static void ext4_sync_parent(struct inode *inode) +static int ext4_sync_parent(struct inode *inode)  { +	struct writeback_control wbc;  	struct dentry *dentry = NULL; +	int ret = 0;  	while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {  		ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); @@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode)  		if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)  			break;  		inode = dentry->d_parent->d_inode; -		sync_mapping_buffers(inode->i_mapping); +		ret = sync_mapping_buffers(inode->i_mapping); +		if (ret) +			break; +		memset(&wbc, 0, sizeof(wbc)); +		wbc.sync_mode = WB_SYNC_ALL; +		wbc.nr_to_write = 0;         /* only write out the inode */ +		ret = sync_inode(inode, &wbc); +		if (ret) +			break;  	} +	return ret;  }  /* @@ -176,7 +187,7 @@ int ext4_sync_file(struct file *file, int datasync)  	if (!journal) {  		ret = generic_file_fsync(file, datasync);  		if (!ret && !list_empty(&inode->i_dentry)) -			ext4_sync_parent(inode); +			ret = ext4_sync_parent(inode);  		goto out;  	} diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ad8e303c0d2..f2fa5e8a582 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2502,6 +2502,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,  		 * for partial write.  		 */  		set_buffer_new(bh); +		set_buffer_mapped(bh);  	}  	return 0;  } @@ -4429,8 +4430,8 @@ void ext4_truncate(struct inode *inode)  	Indirect chain[4];  	Indirect *partial;  	__le32 nr = 0; -	int n; -	ext4_lblk_t last_block; +	int n = 0; +	ext4_lblk_t last_block, max_block;  	unsigned blocksize = inode->i_sb->s_blocksize;  	trace_ext4_truncate_enter(inode); @@ -4455,14 +4456,18 @@ void ext4_truncate(struct inode *inode)  	last_block = (inode->i_size + blocksize-1)  					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb); +	max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) +					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);  	if (inode->i_size & (blocksize - 1))  		if (ext4_block_truncate_page(handle, mapping, inode->i_size))  			goto out_stop; -	n = ext4_block_to_path(inode, last_block, offsets, NULL); -	if (n == 0) -		goto out_stop;	/* error */ +	if (last_block != max_block) { +		n = ext4_block_to_path(inode, last_block, offsets, NULL); +		if (n == 0) +			goto out_stop;	/* error */ +	}  	/*  	 * OK.  This truncate is going to happen.  We add the inode to the @@ -4493,7 +4498,13 @@ void ext4_truncate(struct inode *inode)  	 */  	ei->i_disksize = inode->i_size; -	if (n == 1) {		/* direct blocks */ +	if (last_block == max_block) { +		/* +		 * It is unnecessary to free any data blocks if last_block is +		 * equal to the indirect block limit. +		 */ +		goto out_unlock; +	} else if (n == 1) {		/* direct blocks */  		ext4_free_data(handle, inode, NULL, i_data+offsets[0],  			       i_data + EXT4_NDIR_BLOCKS);  		goto do_indirects; @@ -4553,6 +4564,7 @@ do_indirects:  		;  	} +out_unlock:  	up_write(&ei->i_data_sem);  	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);  	ext4_mark_inode_dirty(handle, inode); @@ -5398,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,  	/* if nrblocks are contiguous */  	if (chunk) {  		/* -		 * With N contiguous data blocks, it need at most -		 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks -		 * 2 dindirect blocks -		 * 1 tindirect block +		 * With N contiguous data blocks, we need at most +		 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, +		 * 2 dindirect blocks, and 1 tindirect block  		 */ -		indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); -		return indirects + 3; +		return DIV_ROUND_UP(nrblocks, +				    EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;  	}  	/*  	 * if nrblocks are not contiguous, worse case, each block touch diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 056474b7b8e..8553dfb310a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle)   * journal_end calls result in the superblock being marked dirty, so   * that sync() will call the filesystem's write_super callback if   * appropriate. + * + * To avoid j_barrier hold in userspace when a user calls freeze(), + * ext4 prevents a new handle from being started by s_frozen, which + * is in an upper layer.   */  handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)  {  	journal_t *journal; +	handle_t  *handle;  	if (sb->s_flags & MS_RDONLY)  		return ERR_PTR(-EROFS); -	vfs_check_frozen(sb, SB_FREEZE_TRANS); -	/* Special case here: if the journal has aborted behind our -	 * backs (eg. EIO in the commit thread), then we still need to -	 * take the FS itself readonly cleanly. */  	journal = EXT4_SB(sb)->s_journal; -	if (journal) { -		if (is_journal_aborted(journal)) { -			ext4_abort(sb, "Detected aborted journal"); -			return ERR_PTR(-EROFS); -		} -		return jbd2_journal_start(journal, nblocks); +	handle = ext4_journal_current_handle(); + +	/* +	 * If a handle has been started, it should be allowed to +	 * finish, otherwise deadlock could happen between freeze +	 * and others(e.g. truncate) due to the restart of the +	 * journal handle if the filesystem is forzen and active +	 * handles are not stopped. +	 */ +	if (!handle) +		vfs_check_frozen(sb, SB_FREEZE_TRANS); + +	if (!journal) +		return ext4_get_nojournal(); +	/* +	 * Special case here: if the journal has aborted behind our +	 * backs (eg. EIO in the commit thread), then we still need to +	 * take the FS itself readonly cleanly. +	 */ +	if (is_journal_aborted(journal)) { +		ext4_abort(sb, "Detected aborted journal"); +		return ERR_PTR(-EROFS);  	} -	return ext4_get_nojournal(); +	return jbd2_journal_start(journal, nblocks);  }  /* @@ -2975,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb,  	mutex_unlock(&ext4_li_info->li_list_mtx);  	sbi->s_li_request = elr; +	/* +	 * set elr to NULL here since it has been inserted to +	 * the request_list and the removal and free of it is +	 * handled by ext4_clear_request_list from now on. +	 */ +	elr = NULL;  	if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {  		ret = ext4_run_lazyinit_thread(); @@ -3385,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	get_random_bytes(&sbi->s_next_generation, sizeof(u32));  	spin_lock_init(&sbi->s_next_gen_lock); +	init_timer(&sbi->s_err_report); +	sbi->s_err_report.function = print_daily_error_info; +	sbi->s_err_report.data = (unsigned long) sb; +  	err = percpu_counter_init(&sbi->s_freeblocks_counter,  			ext4_count_free_blocks(sb));  	if (!err) { @@ -3646,9 +3673,6 @@ no_journal:  		 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,  		 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); -	init_timer(&sbi->s_err_report); -	sbi->s_err_report.function = print_daily_error_info; -	sbi->s_err_report.data = (unsigned long) sb;  	if (es->s_error_count)  		mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -3672,6 +3696,7 @@ failed_mount_wq:  		sbi->s_journal = NULL;  	}  failed_mount3: +	del_timer(&sbi->s_err_report);  	if (sbi->s_flex_groups) {  		if (is_vmalloc_addr(sbi->s_flex_groups))  			vfree(sbi->s_flex_groups); @@ -4138,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)  /*   * LVM calls this function before a (read-only) snapshot is created.  This   * gives us a chance to flush the journal completely and mark the fs clean. + * + * Note that only this function cannot bring a filesystem to be in a clean + * state independently, because ext4 prevents a new handle from being started + * by @sb->s_frozen, which stays in an upper layer.  It thus needs help from + * the upper layer.   */  static int ext4_freeze(struct super_block *sb)  { @@ -4614,11 +4644,24 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,  static int ext4_quota_off(struct super_block *sb, int type)  { +	struct inode *inode = sb_dqopt(sb)->files[type]; +	handle_t *handle; +  	/* Force all delayed allocation blocks to be allocated.  	 * Caller already holds s_umount sem */  	if (test_opt(sb, DELALLOC))  		sync_filesystem(sb); +	/* Update modification times of quota files when userspace can +	 * start looking at them */ +	handle = ext4_journal_start(inode, 1); +	if (IS_ERR(handle)) +		goto out; +	inode->i_mtime = inode->i_ctime = CURRENT_TIME; +	ext4_mark_inode_dirty(handle, inode); +	ext4_journal_stop(handle); + +out:  	return dquot_quota_off(sb, type);  } @@ -4714,9 +4757,8 @@ out:  	if (inode->i_size < off + len) {  		i_size_write(inode, off + len);  		EXT4_I(inode)->i_disksize = inode->i_size; +		ext4_mark_inode_dirty(handle, inode);  	} -	inode->i_mtime = inode->i_ctime = CURRENT_TIME; -	ext4_mark_inode_dirty(handle, inode);  	mutex_unlock(&inode->i_mutex);  	return len;  } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 20af62f4304..6e28000a4b2 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal,  	int ret;  	struct timespec now = current_kernel_time(); +	*cbh = NULL; +  	if (is_journal_aborted(journal))  		return 0; @@ -806,7 +808,7 @@ wait_for_iobuf:  		if (err)  			__jbd2_journal_abort_hard(journal);  	} -	if (!err && !is_journal_aborted(journal)) +	if (cbh)  		err = journal_wait_on_commit_record(journal, cbh);  	if (JBD2_HAS_INCOMPAT_FEATURE(journal,  				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index aba8ebaec25..e0ec3db1c39 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2413,10 +2413,12 @@ const char *jbd2_dev_to_name(dev_t device)  	new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);  	if (!new_dev)  		return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ +	bd = bdget(device);  	spin_lock(&devname_cache_lock);  	if (devcache[i]) {  		if (devcache[i]->device == device) {  			kfree(new_dev); +			bdput(bd);  			ret = devcache[i]->devname;  			spin_unlock(&devname_cache_lock);  			return ret; @@ -2425,7 +2427,6 @@ const char *jbd2_dev_to_name(dev_t device)  	}  	devcache[i] = new_dev;  	devcache[i]->device = device; -	bd = bdget(device);  	if (bd) {  		bdevname(bd, devcache[i]->devname);  		bdput(bd); | 
