From bf068ee266f9dbaa6dacb8433a366bb399e7ae5b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 19 Aug 2008 22:16:43 -0400
Subject: ext4: Handle unwritten extent properly with delayed allocation

When using fallocate the buffer_heads are marked unwritten and unmapped.
We need to map them in the writepages after a get_block.  Otherwise we
split the uninit extents, but never write the content to disk.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 59fbbe899acc..a1c7d7623213 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1741,6 +1741,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
 				if (buffer_delay(bh)) {
 					bh->b_blocknr = pblock;
 					clear_buffer_delay(bh);
+					bh->b_bdev = inode->i_sb->s_bdev;
+				} else if (buffer_unwritten(bh)) {
+					bh->b_blocknr = pblock;
+					clear_buffer_unwritten(bh);
+					set_buffer_mapped(bh);
+					set_buffer_new(bh);
+					bh->b_bdev = inode->i_sb->s_bdev;
 				} else if (buffer_mapped(bh))
 					BUG_ON(bh->b_blocknr != pblock);
 
@@ -1814,7 +1821,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 		 * If blocks are delayed marked, we need to
 		 * put actual blocknr and drop delayed bit
 		 */
-		if (buffer_delay(lbh))
+		if (buffer_delay(lbh) || buffer_unwritten(lbh))
 			mpage_put_bnr_to_bhs(mpd, next, &new);
 
 		/* go for the remaining blocks */
@@ -1823,7 +1830,8 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 	}
 }
 
-#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
+#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
+		(1 << BH_Delay) | (1 << BH_Unwritten))
 
 /*
  * mpage_add_bh_to_extent - try to add one more block to extent of blocks
-- 
cgit v1.2.3


From b4df2030858bde986cb6ff2e4b45945f84649e32 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 13 Aug 2008 21:44:34 -0400
Subject: ext4: Fix potential truncate BUG due to i_prealloc_list being
 non-empty

We need to call ext4_discard_reservation() earlier in ext4_truncate(),
to avoid a BUG() in ext4_mb_return_to_preallocation(), which is called
(ultimately) by ext4_free_blocks().  So we must ditch the blocks on
i_prealloc_list before we start freeing the data blocks.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a1c7d7623213..2d54c822c4c3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3494,6 +3494,9 @@ void ext4_truncate(struct inode *inode)
 	 * modify the block allocation tree.
 	 */
 	down_write(&ei->i_data_sem);
+
+	ext4_discard_reservation(inode);
+
 	/*
 	 * The orphan list entry will now protect us from any crash which
 	 * occurs before the truncate completes, so it is now safe to propagate
@@ -3563,8 +3566,6 @@ do_indirects:
 		;
 	}
 
-	ext4_discard_reservation(inode);
-
 	up_write(&ei->i_data_sem);
 	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
-- 
cgit v1.2.3


From cd21322616c3af265d39bf15321d436e667a5dd1 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Tue, 19 Aug 2008 22:16:59 -0400
Subject: ext4: Fix delalloc release block reservation for truncate

Ext4 will release the reserved blocks for delayed allocations when
inode is truncated/unlinked.  If there is no reserved block at all, we
shouldn't need to do so.  But current code still tries to release the
reserved blocks regardless whether the counters's value is 0.
Continue to do that causes the later calculation to go wrong and a
kernel BUG_ON() caught that. This doesn't happen for extent-based
files, as the calculation for 0 reserved blocks was right for extent
based file.

This patch fixed the kernel BUG() due to above reason.  It adds checks
for 0 to avoid unnecessary release and fix calculation for non-extent
files.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2d54c822c4c3..5e17d5f22a7e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1005,6 +1005,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
  */
 static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
 {
+	if (!blocks)
+		return 0;
+
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
 		return ext4_ext_calc_metadata_amount(inode, blocks);
 
@@ -1559,7 +1562,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	int total, mdb, mdb_free, release;
 
+	if (!to_free)
+		return;		/* Nothing to release, exit */
+
 	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+
+	if (!EXT4_I(inode)->i_reserved_data_blocks) {
+		/*
+		 * if there is no reserved blocks, but we try to free some
+		 * then the counter is messed up somewhere.
+		 * but since this function is called from invalidate
+		 * page, it's harmless to return without any action
+		 */
+		printk(KERN_INFO "ext4 delalloc try to release %d reserved "
+			    "blocks for inode %lu, but there is no reserved "
+			    "data blocks\n", to_free, inode->i_ino);
+		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+		return;
+	}
+
 	/* recalculate the number of metablocks still need to be reserved */
 	total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
 	mdb = ext4_calc_metadata_amount(inode, total);
-- 
cgit v1.2.3


From d015641734cde55d2fce48a6db3983c8a029fe05 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 19 Aug 2008 21:57:43 -0400
Subject: ext4: Fix ext4_dx_readdir hash collision handling

This fixes a bug where readdir() would return a directory entry twice
if there was a hash collision in an hash tree indexed directory.

Signed-off-by: Eugene Dashevsky <eugene@ibrix.com>
Signed-off-by: Mike Snitzer <msnitzer@ibrix.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/dir.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d3d23d73c08b..ec8e33b45219 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent,
 				get_dtype(sb, fname->file_type));
 		if (error) {
 			filp->f_pos = curr_pos;
-			info->extra_fname = fname->next;
+			info->extra_fname = fname;
 			return error;
 		}
 		fname = fname->next;
@@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp,
 	 * If there are any leftover names on the hash collision
 	 * chain, return them first.
 	 */
-	if (info->extra_fname &&
-	    call_filldir(filp, dirent, filldir, info->extra_fname))
-		goto finished;
+	if (info->extra_fname) {
+		if (call_filldir(filp, dirent, filldir, info->extra_fname))
+			goto finished;
 
-	if (!info->curr_node)
+		info->extra_fname = NULL;
+		info->curr_node = rb_next(info->curr_node);
+		if (!info->curr_node) {
+			if (info->next_hash == ~0) {
+				filp->f_pos = EXT4_HTREE_EOF;
+				goto finished;
+			}
+			info->curr_hash = info->next_hash;
+			info->curr_minor_hash = 0;
+		}
+	} else if (!info->curr_node)
 		info->curr_node = rb_first(&info->root);
 
 	while (1) {
-- 
cgit v1.2.3


From 88aa3cff4e9a38b953de9fbc54c96e619a2bb9f9 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 16 Aug 2008 07:57:35 -0400
Subject: ext4: Use ext4_discard_reservations instead of mballoc-specific call

In ext4_ext_truncate(), we should use the more generic
ext4_discard_reservations() call so we do the right thing when the
filesystem is mounted with the nomballoc option.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/ext4/extents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 612c3d2c3824..7212947a8ca3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2819,7 +2819,7 @@ void ext4_ext_truncate(struct inode *inode)
 	down_write(&EXT4_I(inode)->i_data_sem);
 	ext4_ext_invalidate_cache(inode);
 
-	ext4_mb_discard_inode_preallocations(inode);
+	ext4_discard_reservation(inode);
 
 	/*
 	 * TODO: optimization is possible here.
-- 
cgit v1.2.3


From 37609fd5ae62db75026d9f53096a1fbc35e040d9 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@redhat.com>
Date: Tue, 19 Aug 2008 22:13:41 -0400
Subject: ext4: don't try to resize if there are no reserved gdt blocks left

When trying to resize an ext4 fs and you run out of reserved gdt blocks,
you get an error that doesn't actually tell you what went wrong, it just
says that the gdb it picked is not correct, which is the case since you
don't have any reserved gdt blocks left.  This patch adds a check to make
sure you have reserved gdt blocks to use, and if not prints out a more
relevant error.

Signed-off-by: Josef Bacik <jbacik@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Cc: Andreas Dilger <adilger@sun.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/resize.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 0a9265164265..b3d35604ea18 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
 	if (reserved_gdb || gdb_off == 0) {
 		if (!EXT4_HAS_COMPAT_FEATURE(sb,
-					     EXT4_FEATURE_COMPAT_RESIZE_INODE)){
+					     EXT4_FEATURE_COMPAT_RESIZE_INODE)
+		    || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
 			ext4_warning(sb, __func__,
 				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
-- 
cgit v1.2.3


From c001077f4003fa75793bb62979baa6241dd8eb19 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Tue, 19 Aug 2008 22:19:50 -0400
Subject: ext4: Fix bug where we return ENOSPC even though we have plenty of
 inodes

The find_group_flex() function starts with best_flex as the
parent_fbg_group, which happens to have 0 inodes free.  Some of the
flex groups searched have free blocks and free inodes, but the
flex_freeb_ratio is < 10, so they're skipped.  Then when a group is
compared to the current "best" flex group, it does not have more free
blocks than "best", so it is skipped as well.

This continues until no flex group with free inodes is found which has
a proper ratio or which has more free blocks than the "best" group,
and we're left with a "best" group that has 0 inodes free, and we
return -ENOSPC.

We fix this by changing the logic so that if the current "best" flex
group has no inodes free, and the current one does have room, it is
promoted to the next "best."

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ialloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 655e760212b8..f344834bbf58 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -351,7 +351,7 @@ find_close_to_parent:
 			goto found_flexbg;
 		}
 
-		if (best_flex < 0 ||
+		if (flex_group[best_flex].free_inodes == 0 ||
 		    (flex_group[i].free_blocks >
 		     flex_group[best_flex].free_blocks &&
 		     flex_group[i].free_inodes))
-- 
cgit v1.2.3


From a02908f19c819aeec5e3dcf238adaa6deddd70b0 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Tue, 19 Aug 2008 22:16:07 -0400
Subject: ext4: journal credits calulation cleanup and fix for non-extent
 writepage

When considering how many journal credits are needed for modifying a
chunk of data, we need to account for the super block, inode block,
quota blocks and xattr block, indirect/index blocks, also, group bitmap
and group descriptor blocks for new allocation (including data and
indirect/index blocks). There are many places in ext4 do the calculation
on their own and often missed one or two meta blocks, and often they
assume single block allocation, and did not considering the multile
chunk of allocation case.

This patch is trying to cleanup current journal credit code, provides
some common helper funtion to calculate the journal credits, to be used
for writepage, writepages, DIO, fallocate, migration, defrag, and for
both nonextent and extent files.

This patch modified the writepage/write_begin credit caculation for
nonextent files, to use the new helper function. It also fixed the
problem that writepage on nonextent files did not consider the case
blocksize <pagesize, thus could possibelly need multiple block
allocation in a single transaction.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h      |   3 ++
 fs/ext4/ext4_jbd2.h |   8 ++++
 fs/ext4/inode.c     | 131 ++++++++++++++++++++++++++++++++++++++--------------
 3 files changed, 108 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6c7924d9e358..38e661b0ea88 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1072,6 +1072,7 @@ extern void ext4_set_inode_flags(struct inode *);
 extern void ext4_get_inode_flags(struct ext4_inode_info *);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
+extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
 extern int ext4_block_truncate_page(handle_t *handle,
 		struct address_space *mapping, loff_t from);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
@@ -1227,6 +1228,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
 /* extents.c */
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
+extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
+				       int chunk);
 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			ext4_lblk_t iblock,
 			unsigned long max_blocks, struct buffer_head *bh_result,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index eb8bc3afe6e9..b455c685a98b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -51,6 +51,14 @@
 					 EXT4_XATTR_TRANS_BLOCKS - 2 + \
 					 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
 
+/*
+ * Define the number of metadata blocks we need to account to modify data.
+ *
+ * This include super block, inode block, quota blocks and xattr blocks
+ */
+#define EXT4_META_TRANS_BLOCKS(sb)	(EXT4_XATTR_TRANS_BLOCKS + \
+					2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+
 /* Delete operations potentially hit one directory's namespace plus an
  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
  * generous.  We can grow the delete transaction later if necessary. */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5e17d5f22a7e..a27129065144 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4354,56 +4354,119 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	return 0;
 }
 
+static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
+				      int chunk)
+{
+	int indirects;
+
+	/* if nrblocks are contiguous */
+	if (chunk) {
+		/*
+		 * With N contiguous data blocks, it need at most
+		 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
+		 * 2 dindirect blocks
+		 * 1 tindirect block
+		 */
+		indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
+		return indirects + 3;
+	}
+	/*
+	 * if nrblocks are not contiguous, worse case, each block touch
+	 * a indirect block, and each indirect block touch a double indirect
+	 * block, plus a triple indirect block
+	 */
+	indirects = nrblocks * 2 + 1;
+	return indirects;
+}
+
+static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+	if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+		return ext4_indirect_trans_blocks(inode, nrblocks, 0);
+	return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
+}
 /*
- * How many blocks doth make a writepage()?
+ * Account for index blocks, block groups bitmaps and block group
+ * descriptor blocks if modify datablocks and index blocks
+ * worse case, the indexs blocks spread over different block groups
  *
- * With N blocks per page, it may be:
- * N data blocks
- * 2 indirect block
- * 2 dindirect
- * 1 tindirect
- * N+5 bitmap blocks (from the above)
- * N+5 group descriptor summary blocks
- * 1 inode block
- * 1 superblock.
- * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
+ * If datablocks are discontiguous, they are possible to spread over
+ * different block groups too. If they are contiugous, with flexbg,
+ * they could still across block group boundary.
  *
- * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
+ * Also account for superblock, inode, quota and xattr blocks
+ */
+int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+	int groups, gdpblocks;
+	int idxblocks;
+	int ret = 0;
+
+	/*
+	 * How many index blocks need to touch to modify nrblocks?
+	 * The "Chunk" flag indicating whether the nrblocks is
+	 * physically contiguous on disk
+	 *
+	 * For Direct IO and fallocate, they calls get_block to allocate
+	 * one single extent at a time, so they could set the "Chunk" flag
+	 */
+	idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
+
+	ret = idxblocks;
+
+	/*
+	 * Now let's see how many group bitmaps and group descriptors need
+	 * to account
+	 */
+	groups = idxblocks;
+	if (chunk)
+		groups += 1;
+	else
+		groups += nrblocks;
+
+	gdpblocks = groups;
+	if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
+		groups = EXT4_SB(inode->i_sb)->s_groups_count;
+	if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
+		gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
+
+	/* bitmaps and block group descriptor blocks */
+	ret += groups + gdpblocks;
+
+	/* Blocks for super block, inode, quota and xattr blocks */
+	ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
+
+	return ret;
+}
+
+/*
+ * Calulate the total number of credits to reserve to fit
+ * the modification of a single pages into a single transaction
  *
- * With ordered or writeback data it's the same, less the N data blocks.
+ * This could be called via ext4_write_begin() or later
+ * ext4_da_writepages() in delalyed allocation case.
  *
- * If the inode's direct blocks can hold an integral number of pages then a
- * page cannot straddle two indirect blocks, and we can only touch one indirect
- * and dindirect block, and the "5" above becomes "3".
+ * In both case it's possible that we could allocating multiple
+ * chunks of blocks. We need to consider the worse case, when
+ * one new block per extent.
  *
- * This still overestimates under most circumstances.  If we were to pass the
- * start and end offsets in here as well we could do block_to_path() on each
- * block and work out the exact number of indirects which are touched.  Pah.
+ * For Direct IO and fallocate, the journal credits reservation
+ * is based on one single extent allocation, so they could use
+ * EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single
+ * chunk of allocation needs.
  */
-
 int ext4_writepage_trans_blocks(struct inode *inode)
 {
 	int bpp = ext4_journal_blocks_per_page(inode);
-	int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
 	int ret;
 
-	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
-		return ext4_ext_writepage_trans_blocks(inode, bpp);
+	ret = ext4_meta_trans_blocks(inode, bpp, 0);
 
+	/* Account for data blocks for journalled mode */
 	if (ext4_should_journal_data(inode))
-		ret = 3 * (bpp + indirects) + 2;
-	else
-		ret = 2 * (bpp + indirects) + 2;
-
-#ifdef CONFIG_QUOTA
-	/* We know that structure was already allocated during DQUOT_INIT so
-	 * we will be updating only the data blocks + inodes */
-	ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
-
+		ret += bpp;
 	return ret;
 }
-
 /*
  * The caller must have previously called ext4_reserve_inode_write().
  * Give this, we know that the caller already has write access to iloc->bh.
-- 
cgit v1.2.3


From ee12b630687d510f6f4b6d4acdc4e267fd4adeda Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Tue, 19 Aug 2008 22:16:05 -0400
Subject: ext4: journal credits reservation fixes for extent file writepage

This patch modified the writepage/write_begin credit calculation for
extent files, to use the credits caculation helper function.

The current calculation of how many index/leaf blocks should be
accounted is too conservetive, it always considered the worse case,
where the tree level is 5, and in the case of multiple chunk
allocations, it always assumed no blocks were dirtied in common across
the allocations. This path uses the accurate depth of the inode with
some extras to calculate the index blocks, and also less conservative in
the case of multiple allocation accounting.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4_extents.h |   4 +-
 fs/ext4/extents.c      | 104 +++++++++++++++++++++----------------------------
 fs/ext4/migrate.c      |   3 +-
 3 files changed, 49 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 6c166c0a54b7..d33dc56d6986 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
 extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
 extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
-extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
+						   int num,
+						   struct ext4_ext_path *path);
 extern int ext4_ext_try_to_merge(struct inode *inode,
 				 struct ext4_ext_path *path,
 				 struct ext4_extent *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7212947a8ca3..5c5dd3a1d657 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 }
 
 /*
- * ext4_ext_calc_credits_for_insert:
- * This routine returns max. credits that the extent tree can consume.
- * It should be OK for low-performance paths like ->writepage()
- * To allow many writing processes to fit into a single transaction,
- * the caller should calculate credits under i_data_sem and
- * pass the actual path.
+ * ext4_ext_calc_credits_for_single_extent:
+ * This routine returns max. credits that needed to insert an extent
+ * to the extent tree.
+ * When pass the actual path, the caller should calculate credits
+ * under i_data_sem.
  */
-int ext4_ext_calc_credits_for_insert(struct inode *inode,
+int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
 						struct ext4_ext_path *path)
 {
-	int depth, needed;
-
 	if (path) {
+		int depth = ext_depth(inode);
+		int ret;
+
 		/* probably there is space in leaf? */
-		depth = ext_depth(inode);
 		if (le16_to_cpu(path[depth].p_hdr->eh_entries)
-				< le16_to_cpu(path[depth].p_hdr->eh_max))
-			return 1;
-	}
+				< le16_to_cpu(path[depth].p_hdr->eh_max)) {
 
-	/*
-	 * given 32-bit logical block (4294967296 blocks), max. tree
-	 * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
-	 * Let's also add one more level for imbalance.
-	 */
-	depth = 5;
-
-	/* allocation of new data block(s) */
-	needed = 2;
+			/*
+			 *  There are some space in the leaf tree, no
+			 *  need to account for leaf block credit
+			 *
+			 *  bitmaps and block group descriptor blocks
+			 *  and other metadat blocks still need to be
+			 *  accounted.
+			 */
+			/* 1 one bitmap, 1 block group descriptor */
+			ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
+		}
+	}
 
-	/*
-	 * tree can be full, so it would need to grow in depth:
-	 * we need one credit to modify old root, credits for
-	 * new root will be added in split accounting
-	 */
-	needed += 1;
+	return ext4_meta_trans_blocks(inode, num, 1);
+}
 
-	/*
-	 * Index split can happen, we would need:
-	 *    allocate intermediate indexes (bitmap + group)
-	 *  + change two blocks at each level, but root (already included)
-	 */
-	needed += (depth * 2) + (depth * 2);
+/*
+ * How many index/leaf blocks need to change/allocate to modify nrblocks?
+ *
+ * if nrblocks are fit in a single extent (chunk flag is 1), then
+ * in the worse case, each tree level index/leaf need to be changed
+ * if the tree split due to insert a new extent, then the old tree
+ * index/leaf need to be updated too
+ *
+ * If the nrblocks are discontiguous, they could cause
+ * the whole tree split more than once, but this is really rare.
+ */
+int ext4_ext_index_trans_blocks(struct inode *inode, int num, int chunk)
+{
+	int index;
+	int depth = ext_depth(inode);
 
-	/* any allocation modifies superblock */
-	needed += 1;
+	if (chunk)
+		index = depth * 2;
+	else
+		index = depth * 3;
 
-	return needed;
+	return index;
 }
 
 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
@@ -1921,9 +1928,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 			correct_index = 1;
 			credits += (ext_depth(inode)) + 1;
 		}
-#ifdef CONFIG_QUOTA
 		credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
 
 		err = ext4_ext_journal_restart(handle, credits);
 		if (err)
@@ -2858,27 +2863,6 @@ out_stop:
 	ext4_journal_stop(handle);
 }
 
-/*
- * ext4_ext_writepage_trans_blocks:
- * calculate max number of blocks we could modify
- * in order to allocate new block for an inode
- */
-int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
-{
-	int needed;
-
-	needed = ext4_ext_calc_credits_for_insert(inode, NULL);
-
-	/* caller wants to allocate num blocks, but note it includes sb */
-	needed = needed * num - (num - 1);
-
-#ifdef CONFIG_QUOTA
-	needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
-
-	return needed;
-}
-
 static void ext4_falloc_update_inode(struct inode *inode,
 				int mode, loff_t new_size, int update_ctime)
 {
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b9e077ba07e9..46fc0b5b12ba 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
 	 * credit. But below we try to not accumalate too much
 	 * of them by restarting the journal.
 	 */
-	needed = ext4_ext_calc_credits_for_insert(inode, path);
+	needed = ext4_ext_calc_credits_for_single_extent(inode,
+		    lb->last_block - lb->first_block + 1, path);
 
 	/*
 	 * Make sure the credit we accumalated is not really high
-- 
cgit v1.2.3


From f3bd1f3fa8ca7ec70cfd87aa94dc5e1a260901f2 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Tue, 19 Aug 2008 22:16:03 -0400
Subject: ext4: journal credits reservation fixes for DIO, fallocate

DIO and fallocate credit calculation is different than writepage, as
they do start a new journal right for each call to ext4_get_blocks_wrap().
This patch uses the helper function in DIO and fallocate case, passing
a flag indicating that the modified data are contigous thus could account
less indirect/index blocks.

This patch also fixed the journal credit reservation for direct I/O
(DIO).  Previously the estimated credits for DIO only was calculated for
non-extent files, which was not enough if the file is extent-based.

Also fixed was fallocate double-counting credits for modifying the the
superblock.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h    |  1 +
 fs/ext4/extents.c | 11 +++++------
 fs/ext4/inode.c   | 45 ++++++++++++++++++++++++---------------------
 3 files changed, 30 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 38e661b0ea88..295003241d3d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1073,6 +1073,7 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
 extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
+extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_block_truncate_page(handle_t *handle,
 		struct address_space *mapping, loff_t from);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5c5dd3a1d657..5596b70efa20 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1758,7 +1758,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
 {
 	if (path) {
 		int depth = ext_depth(inode);
-		int ret;
+		int ret = 0;
 
 		/* probably there is space in leaf? */
 		if (le16_to_cpu(path[depth].p_hdr->eh_entries)
@@ -1777,7 +1777,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
 		}
 	}
 
-	return ext4_meta_trans_blocks(inode, num, 1);
+	return ext4_chunk_trans_blocks(inode, num);
 }
 
 /*
@@ -2810,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode)
 	/*
 	 * probably first extent we're gonna free will be last in block
 	 */
-	err = ext4_writepage_trans_blocks(inode) + 3;
+	err = ext4_writepage_trans_blocks(inode);
 	handle = ext4_journal_start(inode, err);
 	if (IS_ERR(handle))
 		return;
@@ -2923,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 	max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
 							- block;
 	/*
-	 * credits to insert 1 extent into extent tree + buffers to be able to
-	 * modify 1 super block, 1 block bitmap and 1 group descriptor.
+	 * credits to insert 1 extent into extent tree
 	 */
-	credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
+	credits = ext4_chunk_trans_blocks(inode, max_blocks);
 	mutex_lock(&inode->i_mutex);
 retry:
 	while (ret >= 0 && ret < max_blocks) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a27129065144..ffc95ba48859 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1044,18 +1044,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
 
-/* Maximum number of blocks we map for direct IO at once. */
-#define DIO_MAX_BLOCKS 4096
-/*
- * Number of credits we need for writing DIO_MAX_BLOCKS:
- * We need sb + group descriptor + bitmap + inode -> 4
- * For B blocks with A block pointers per block we need:
- * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
- * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
- */
-#define DIO_CREDITS 25
-
-
 /*
  * The ext4_get_blocks_wrap() function try to look up the requested blocks,
  * and returns if the blocks are already mapped.
@@ -1167,19 +1155,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 	return retval;
 }
 
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
+
 static int ext4_get_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
 	handle_t *handle = ext4_journal_current_handle();
 	int ret = 0, started = 0;
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+	int dio_credits;
 
 	if (create && !handle) {
 		/* Direct IO write... */
 		if (max_blocks > DIO_MAX_BLOCKS)
 			max_blocks = DIO_MAX_BLOCKS;
-		handle = ext4_journal_start(inode, DIO_CREDITS +
-			      2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
+		dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
+		handle = ext4_journal_start(inode, dio_credits);
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
 			goto out;
@@ -2243,7 +2235,7 @@ static int ext4_da_writepage(struct page *page,
  * for DIO, writepages, and truncate
  */
 #define EXT4_MAX_WRITEBACK_PAGES      DIO_MAX_BLOCKS
-#define EXT4_MAX_WRITEBACK_CREDITS    DIO_CREDITS
+#define EXT4_MAX_WRITEBACK_CREDITS    25
 
 static int ext4_da_writepages(struct address_space *mapping,
 				struct writeback_control *wbc)
@@ -4441,7 +4433,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 
 /*
  * Calulate the total number of credits to reserve to fit
- * the modification of a single pages into a single transaction
+ * the modification of a single pages into a single transaction,
+ * which may include multiple chunks of block allocations.
  *
  * This could be called via ext4_write_begin() or later
  * ext4_da_writepages() in delalyed allocation case.
@@ -4449,11 +4442,6 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  * In both case it's possible that we could allocating multiple
  * chunks of blocks. We need to consider the worse case, when
  * one new block per extent.
- *
- * For Direct IO and fallocate, the journal credits reservation
- * is based on one single extent allocation, so they could use
- * EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single
- * chunk of allocation needs.
  */
 int ext4_writepage_trans_blocks(struct inode *inode)
 {
@@ -4467,6 +4455,21 @@ int ext4_writepage_trans_blocks(struct inode *inode)
 		ret += bpp;
 	return ret;
 }
+
+/*
+ * Calculate the journal credits for a chunk of data modification.
+ *
+ * This is called from DIO, fallocate or whoever calling
+ * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
+ *
+ * journal buffers for data blocks are not included here, as DIO
+ * and fallocate do no need to journal data buffers.
+ */
+int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
+{
+	return ext4_meta_trans_blocks(inode, nrblocks, 1);
+}
+
 /*
  * The caller must have previously called ext4_reserve_inode_write().
  * Give this, we know that the caller already has write access to iloc->bh.
-- 
cgit v1.2.3


From a1d6cc563bfdf1bf2829d3e6ce4d8b774251796b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 19 Aug 2008 21:55:02 -0400
Subject: ext4: Rework the ext4_da_writepages() function

With the below changes we reserve credit needed to insert only one
extent resulting from a call to single get_block.  This makes sure we
don't take too much journal credits during writeout.  We also don't
limit the pages to write.  That means we loop through the dirty pages
building largest possible contiguous block request.  Then we issue a
single get_block request.  We may get less block that we requested.  If
so we would end up not mapping some of the buffer_heads.  That means
those buffer_heads are still marked delay.  Later in the writepage
callback via __mpage_writepage we redirty those pages.

We should also not limit/throttle wbc->nr_to_write in the filesystem
writepages callback. That cause wrong behaviour in
generic_sync_sb_inodes caused by wbc->nr_to_write being <= 0

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 201 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 113 insertions(+), 88 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ffc95ba48859..8dd22eade42c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -41,6 +41,8 @@
 #include "acl.h"
 #include "ext4_extents.h"
 
+#define MPAGE_DA_EXTENT_TAIL 0x01
+
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
 					      loff_t new_size)
 {
@@ -1626,11 +1628,13 @@ struct mpage_da_data {
 	unsigned long first_page, next_page;	/* extent of pages */
 	get_block_t *get_block;
 	struct writeback_control *wbc;
+	int io_done;
+	long pages_written;
 };
 
 /*
  * mpage_da_submit_io - walks through extent of pages and try to write
- * them with __mpage_writepage()
+ * them with writepage() call back
  *
  * @mpd->inode: inode
  * @mpd->first_page: first page of the extent
@@ -1645,18 +1649,11 @@ struct mpage_da_data {
 static int mpage_da_submit_io(struct mpage_da_data *mpd)
 {
 	struct address_space *mapping = mpd->inode->i_mapping;
-	struct mpage_data mpd_pp = {
-		.bio = NULL,
-		.last_block_in_bio = 0,
-		.get_block = mpd->get_block,
-		.use_writepage = 1,
-	};
 	int ret = 0, err, nr_pages, i;
 	unsigned long index, end;
 	struct pagevec pvec;
 
 	BUG_ON(mpd->next_page <= mpd->first_page);
-
 	pagevec_init(&pvec, 0);
 	index = mpd->first_page;
 	end = mpd->next_page - 1;
@@ -1674,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
 				break;
 			index++;
 
-			err = __mpage_writepage(page, mpd->wbc, &mpd_pp);
-
+			err = mapping->a_ops->writepage(page, mpd->wbc);
+			if (!err)
+				mpd->pages_written++;
 			/*
 			 * In error case, we have to continue because
 			 * remaining pages are still locked
@@ -1686,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
 		}
 		pagevec_release(&pvec);
 	}
-	if (mpd_pp.bio)
-		mpage_bio_submit(WRITE, mpd_pp.bio);
-
 	return ret;
 }
 
@@ -1711,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
 	int blocks = exbh->b_size >> inode->i_blkbits;
 	sector_t pblock = exbh->b_blocknr, cur_logical;
 	struct buffer_head *head, *bh;
-	unsigned long index, end;
+	pgoff_t index, end;
 	struct pagevec pvec;
 	int nr_pages, i;
 
@@ -1796,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
  *
  * The function skips space we know is already mapped to disk blocks.
  *
- * The function ignores errors ->get_block() returns, thus real
- * error handling is postponed to __mpage_writepage()
  */
 static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
+	int err = 0;
 	struct buffer_head *lbh = &mpd->lbh;
-	int err = 0, remain = lbh->b_size;
 	sector_t next = lbh->b_blocknr;
 	struct buffer_head new;
 
@@ -1812,35 +1805,32 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 	if (buffer_mapped(lbh) && !buffer_delay(lbh))
 		return;
 
-	while (remain) {
-		new.b_state = lbh->b_state;
-		new.b_blocknr = 0;
-		new.b_size = remain;
-		err = mpd->get_block(mpd->inode, next, &new, 1);
-		if (err) {
-			/*
-			 * Rather than implement own error handling
-			 * here, we just leave remaining blocks
-			 * unallocated and try again with ->writepage()
-			 */
-			break;
-		}
-		BUG_ON(new.b_size == 0);
+	new.b_state = lbh->b_state;
+	new.b_blocknr = 0;
+	new.b_size = lbh->b_size;
 
-		if (buffer_new(&new))
-			__unmap_underlying_blocks(mpd->inode, &new);
+	/*
+	 * If we didn't accumulate anything
+	 * to write simply return
+	 */
+	if (!new.b_size)
+		return;
+	err = mpd->get_block(mpd->inode, next, &new, 1);
+	if (err)
+		return;
+	BUG_ON(new.b_size == 0);
 
-		/*
-		 * If blocks are delayed marked, we need to
-		 * put actual blocknr and drop delayed bit
-		 */
-		if (buffer_delay(lbh) || buffer_unwritten(lbh))
-			mpage_put_bnr_to_bhs(mpd, next, &new);
+	if (buffer_new(&new))
+		__unmap_underlying_blocks(mpd->inode, &new);
 
-		/* go for the remaining blocks */
-		next += new.b_size >> mpd->inode->i_blkbits;
-		remain -= new.b_size;
-	}
+	/*
+	 * If blocks are delayed marked, we need to
+	 * put actual blocknr and drop delayed bit
+	 */
+	if (buffer_delay(lbh) || buffer_unwritten(lbh))
+		mpage_put_bnr_to_bhs(mpd, next, &new);
+
+	return;
 }
 
 #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -1886,13 +1876,9 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
 	 * need to flush current  extent and start new one
 	 */
 	mpage_da_map_blocks(mpd);
-
-	/*
-	 * Now start a new extent
-	 */
-	lbh->b_size = bh->b_size;
-	lbh->b_state = bh->b_state & BH_FLAGS;
-	lbh->b_blocknr = logical;
+	mpage_da_submit_io(mpd);
+	mpd->io_done = 1;
+	return;
 }
 
 /*
@@ -1912,17 +1898,35 @@ static int __mpage_da_writepage(struct page *page,
 	struct buffer_head *bh, *head, fake;
 	sector_t logical;
 
+	if (mpd->io_done) {
+		/*
+		 * Rest of the page in the page_vec
+		 * redirty then and skip then. We will
+		 * try to to write them again after
+		 * starting a new transaction
+		 */
+		redirty_page_for_writepage(wbc, page);
+		unlock_page(page);
+		return MPAGE_DA_EXTENT_TAIL;
+	}
 	/*
 	 * Can we merge this page to current extent?
 	 */
 	if (mpd->next_page != page->index) {
 		/*
 		 * Nope, we can't. So, we map non-allocated blocks
-		 * and start IO on them using __mpage_writepage()
+		 * and start IO on them using writepage()
 		 */
 		if (mpd->next_page != mpd->first_page) {
 			mpage_da_map_blocks(mpd);
 			mpage_da_submit_io(mpd);
+			/*
+			 * skip rest of the page in the page_vec
+			 */
+			mpd->io_done = 1;
+			redirty_page_for_writepage(wbc, page);
+			unlock_page(page);
+			return MPAGE_DA_EXTENT_TAIL;
 		}
 
 		/*
@@ -1953,6 +1957,8 @@ static int __mpage_da_writepage(struct page *page,
 		set_buffer_dirty(bh);
 		set_buffer_uptodate(bh);
 		mpage_add_bh_to_extent(mpd, logical, bh);
+		if (mpd->io_done)
+			return MPAGE_DA_EXTENT_TAIL;
 	} else {
 		/*
 		 * Page with regular buffer heads, just add all dirty ones
@@ -1961,8 +1967,12 @@ static int __mpage_da_writepage(struct page *page,
 		bh = head;
 		do {
 			BUG_ON(buffer_locked(bh));
-			if (buffer_dirty(bh))
+			if (buffer_dirty(bh) &&
+				(!buffer_mapped(bh) || buffer_delay(bh))) {
 				mpage_add_bh_to_extent(mpd, logical, bh);
+				if (mpd->io_done)
+					return MPAGE_DA_EXTENT_TAIL;
+			}
 			logical++;
 		} while ((bh = bh->b_this_page) != head);
 	}
@@ -1981,22 +1991,13 @@ static int __mpage_da_writepage(struct page *page,
  *
  * This is a library function, which implements the writepages()
  * address_space_operation.
- *
- * In order to avoid duplication of logic that deals with partial pages,
- * multiple bio per page, etc, we find non-allocated blocks, allocate
- * them with minimal calls to ->get_block() and re-use __mpage_writepage()
- *
- * It's important that we call __mpage_writepage() only once for each
- * involved page, otherwise we'd have to implement more complicated logic
- * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
- *
- * See comments to mpage_writepages()
  */
 static int mpage_da_writepages(struct address_space *mapping,
 			       struct writeback_control *wbc,
 			       get_block_t get_block)
 {
 	struct mpage_da_data mpd;
+	long to_write;
 	int ret;
 
 	if (!get_block)
@@ -2010,17 +2011,22 @@ static int mpage_da_writepages(struct address_space *mapping,
 	mpd.first_page = 0;
 	mpd.next_page = 0;
 	mpd.get_block = get_block;
+	mpd.io_done = 0;
+	mpd.pages_written = 0;
+
+	to_write = wbc->nr_to_write;
 
 	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
 
 	/*
 	 * Handle last extent of pages
 	 */
-	if (mpd.next_page != mpd.first_page) {
+	if (!mpd.io_done && mpd.next_page != mpd.first_page) {
 		mpage_da_map_blocks(&mpd);
 		mpage_da_submit_io(&mpd);
 	}
 
+	wbc->nr_to_write = to_write - mpd.pages_written;
 	return ret;
 }
 
@@ -2238,7 +2244,7 @@ static int ext4_da_writepage(struct page *page,
 #define EXT4_MAX_WRITEBACK_CREDITS    25
 
 static int ext4_da_writepages(struct address_space *mapping,
-				struct writeback_control *wbc)
+			      struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
 	handle_t *handle = NULL;
@@ -2246,42 +2252,53 @@ static int ext4_da_writepages(struct address_space *mapping,
 	int ret = 0;
 	long to_write;
 	loff_t range_start = 0;
+	long pages_skipped = 0;
 
 	/*
 	 * No pages to write? This is mainly a kludge to avoid starting
 	 * a transaction for special inodes like journal inode on last iput()
 	 * because that could violate lock ordering on umount
 	 */
-	if (!mapping->nrpages)
+	if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
 		return 0;
 
-	/*
-	 * Estimate the worse case needed credits to write out
-	 * EXT4_MAX_BUF_BLOCKS pages
-	 */
-	needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
-
-	to_write = wbc->nr_to_write;
-	if (!wbc->range_cyclic) {
+	if (!wbc->range_cyclic)
 		/*
 		 * If range_cyclic is not set force range_cont
 		 * and save the old writeback_index
 		 */
 		wbc->range_cont = 1;
-		range_start =  wbc->range_start;
-	}
 
-	while (!ret && to_write) {
+	range_start =  wbc->range_start;
+	pages_skipped = wbc->pages_skipped;
+
+restart_loop:
+	to_write = wbc->nr_to_write;
+	while (!ret && to_write > 0) {
+
+		/*
+		 * we  insert one extent at a time. So we need
+		 * credit needed for single extent allocation.
+		 * journalled mode is currently not supported
+		 * by delalloc
+		 */
+		BUG_ON(ext4_should_journal_data(inode));
+		needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
+
 		/* start a new transaction*/
 		handle = ext4_journal_start(inode, needed_blocks);
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
+			printk(KERN_EMERG "%s: jbd2_start: "
+			       "%ld pages, ino %lu; err %d\n", __func__,
+				wbc->nr_to_write, inode->i_ino, ret);
+			dump_stack();
 			goto out_writepages;
 		}
 		if (ext4_should_order_data(inode)) {
 			/*
 			 * With ordered mode we need to add
-			 * the inode to the journal handle
+			 * the inode to the journal handl
 			 * when we do block allocation.
 			 */
 			ret = ext4_jbd2_file_inode(handle, inode);
@@ -2289,20 +2306,20 @@ static int ext4_da_writepages(struct address_space *mapping,
 				ext4_journal_stop(handle);
 				goto out_writepages;
 			}
-
 		}
-		/*
-		 * set the max dirty pages could be write at a time
-		 * to fit into the reserved transaction credits
-		 */
-		if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
-			wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
 
 		to_write -= wbc->nr_to_write;
 		ret = mpage_da_writepages(mapping, wbc,
-						ext4_da_get_block_write);
+					  ext4_da_get_block_write);
 		ext4_journal_stop(handle);
-		if (wbc->nr_to_write) {
+		if (ret == MPAGE_DA_EXTENT_TAIL) {
+			/*
+			 * got one extent now try with
+			 * rest of the pages
+			 */
+			to_write += wbc->nr_to_write;
+			ret = 0;
+		} else if (wbc->nr_to_write) {
 			/*
 			 * There is no more writeout needed
 			 * or we requested for a noblocking writeout
@@ -2314,10 +2331,18 @@ static int ext4_da_writepages(struct address_space *mapping,
 		wbc->nr_to_write = to_write;
 	}
 
+	if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
+		/* We skipped pages in this loop */
+		wbc->range_start = range_start;
+		wbc->nr_to_write = to_write +
+				wbc->pages_skipped - pages_skipped;
+		wbc->pages_skipped = pages_skipped;
+		goto restart_loop;
+	}
+
 out_writepages:
 	wbc->nr_to_write = to_write;
-	if (range_start)
-		wbc->range_start = range_start;
+	wbc->range_start = range_start;
 	return ret;
 }
 
-- 
cgit v1.2.3


From 525f4ed8dcb72c71b306a78ecbf06f41d08fe441 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Tue, 19 Aug 2008 22:15:58 -0400
Subject: ext4: journal credit fix for the delayed allocation's writepages()
 function

Previous delalloc writepages implementation started a new transaction
outside of a loop which called get_block() to do the block allocation.
Since we didn't know exactly how many blocks would need to be allocated,
the estimated journal credits required was very conservative and caused
many issues.

With the reworked delayed allocation, a new transaction is created for
each get_block(), thus we don't need to guess how many credits for the
multiple chunk of allocation.  We start every transaction with enough
credits for inserting a single exent.  When estimate the credits for
indirect blocks to allocate a chunk of blocks, we need to know the
number of data blocks to allocate.  We use the total number of reserved
delalloc datablocks; if that is too big, for non-extent files, we need
to limit the number of blocks to EXT4_MAX_TRANS_BLOCKS.

Code cleanup from Aneesh.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-off-by:  Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c |  8 +++---
 fs/ext4/inode.c   | 74 ++++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 58 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5596b70efa20..b24d3c53f20c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1753,7 +1753,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
  * When pass the actual path, the caller should calculate credits
  * under i_data_sem.
  */
-int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
+int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
 						struct ext4_ext_path *path)
 {
 	if (path) {
@@ -1772,12 +1772,12 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
 			 *  and other metadat blocks still need to be
 			 *  accounted.
 			 */
-			/* 1 one bitmap, 1 block group descriptor */
+			/* 1 bitmap, 1 block group descriptor */
 			ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
 		}
 	}
 
-	return ext4_chunk_trans_blocks(inode, num);
+	return ext4_chunk_trans_blocks(inode, nrblocks);
 }
 
 /*
@@ -1791,7 +1791,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
  * If the nrblocks are discontiguous, they could cause
  * the whole tree split more than once, but this is really rare.
  */
-int ext4_ext_index_trans_blocks(struct inode *inode, int num, int chunk)
+int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
 	int index;
 	int depth = ext_depth(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8dd22eade42c..d1906d9a22de 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1848,29 +1848,53 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
 				   sector_t logical, struct buffer_head *bh)
 {
-	struct buffer_head *lbh = &mpd->lbh;
 	sector_t next;
+	size_t b_size = bh->b_size;
+	struct buffer_head *lbh = &mpd->lbh;
+	int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
 
-	next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
-
+	/* check if thereserved journal credits might overflow */
+	if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+		if (nrblocks >= EXT4_MAX_TRANS_DATA) {
+			/*
+			 * With non-extent format we are limited by the journal
+			 * credit available.  Total credit needed to insert
+			 * nrblocks contiguous blocks is dependent on the
+			 * nrblocks.  So limit nrblocks.
+			 */
+			goto flush_it;
+		} else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
+				EXT4_MAX_TRANS_DATA) {
+			/*
+			 * Adding the new buffer_head would make it cross the
+			 * allowed limit for which we have journal credit
+			 * reserved. So limit the new bh->b_size
+			 */
+			b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
+						mpd->inode->i_blkbits;
+			/* we will do mpage_da_submit_io in the next loop */
+		}
+	}
 	/*
 	 * First block in the extent
 	 */
 	if (lbh->b_size == 0) {
 		lbh->b_blocknr = logical;
-		lbh->b_size = bh->b_size;
+		lbh->b_size = b_size;
 		lbh->b_state = bh->b_state & BH_FLAGS;
 		return;
 	}
 
+	next = lbh->b_blocknr + nrblocks;
 	/*
 	 * Can we merge the block to our big extent?
 	 */
 	if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
-		lbh->b_size += bh->b_size;
+		lbh->b_size += b_size;
 		return;
 	}
 
+flush_it:
 	/*
 	 * We couldn't merge the block to our extent, so we
 	 * need to flush current  extent and start new one
@@ -2231,17 +2255,29 @@ static int ext4_da_writepage(struct page *page,
 }
 
 /*
- * For now just follow the DIO way to estimate the max credits
- * needed to write out EXT4_MAX_WRITEBACK_PAGES.
- * todo: need to calculate the max credits need for
- * extent based files, currently the DIO credits is based on
- * indirect-blocks mapping way.
- *
- * Probably should have a generic way to calculate credits
- * for DIO, writepages, and truncate
+ * This is called via ext4_da_writepages() to
+ * calulate the total number of credits to reserve to fit
+ * a single extent allocation into a single transaction,
+ * ext4_da_writpeages() will loop calling this before
+ * the block allocation.
  */
-#define EXT4_MAX_WRITEBACK_PAGES      DIO_MAX_BLOCKS
-#define EXT4_MAX_WRITEBACK_CREDITS    25
+
+static int ext4_da_writepages_trans_blocks(struct inode *inode)
+{
+	int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+
+	/*
+	 * With non-extent format the journal credit needed to
+	 * insert nrblocks contiguous block is dependent on
+	 * number of contiguous block. So we will limit
+	 * number of contiguous block to a sane value
+	 */
+	if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+	    (max_blocks > EXT4_MAX_TRANS_DATA))
+		max_blocks = EXT4_MAX_TRANS_DATA;
+
+	return ext4_chunk_trans_blocks(inode, max_blocks);
+}
 
 static int ext4_da_writepages(struct address_space *mapping,
 			      struct writeback_control *wbc)
@@ -2283,7 +2319,7 @@ restart_loop:
 		 * by delalloc
 		 */
 		BUG_ON(ext4_should_journal_data(inode));
-		needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
+		needed_blocks = ext4_da_writepages_trans_blocks(inode);
 
 		/* start a new transaction*/
 		handle = ext4_journal_start(inode, needed_blocks);
@@ -4461,11 +4497,9 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  * the modification of a single pages into a single transaction,
  * which may include multiple chunks of block allocations.
  *
- * This could be called via ext4_write_begin() or later
- * ext4_da_writepages() in delalyed allocation case.
+ * This could be called via ext4_write_begin()
  *
- * In both case it's possible that we could allocating multiple
- * chunks of blocks. We need to consider the worse case, when
+ * We need to consider the worse case, when
  * one new block per extent.
  */
 int ext4_writepage_trans_blocks(struct inode *inode)
-- 
cgit v1.2.3


From 16eb72956496594d023a7d7cd14a86404ad195ad Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 19 Aug 2008 21:16:54 -0400
Subject: ext4: make sure ext4_has_free_blocks returns 0 for ENOSPC

Fix ext4_has_free_blocks() to return 0 when we don't have enough space.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/balloc.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1ae5004e93fc..e9fa960ba6da 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1626,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
 		free_blocks =
 			percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
 #endif
+	if (free_blocks <= root_blocks)
+		/* we don't have free space */
+		return 0;
 	if (free_blocks - root_blocks < nblocks)
 		return free_blocks - root_blocks;
 	return nblocks;
-- 
cgit v1.2.3


From 91246c009094142f95ecc7573b7caed2bcef52c7 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 19 Aug 2008 21:14:52 -0400
Subject: ext4: Initialize writeback_index to 0 when allocating a new inode

The write_cache_pages() function uses the mapping->writeback_index as
the starting index to write out when range_cyclic is set.  Properly
initialize writeback_index so that we start the writeout at index 0.

This was found when debugging the small file fragmentation on ext4.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d5d77958b861..566344b926b7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -568,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 #endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
+	ei->vfs_inode.i_data.writeback_index = 0;
 	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 	INIT_LIST_HEAD(&ei->i_prealloc_list);
 	spin_lock_init(&ei->i_prealloc_lock);
-- 
cgit v1.2.3


From 5e745b041f2ccad63077118b40468521306f3962 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 18 Aug 2008 18:00:57 -0400
Subject: ext4: Fix small file fragmentation

For small file block allocations, mballoc uses per cpu prealloc
space.  Use goal block when searching for the right prealloc
space.  Also make sure ext4_da_writepages tries to write
all the pages for small files in single attempt

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c   | 21 +++++++++++++++------
 fs/ext4/mballoc.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 61 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d1906d9a22de..7e91913e325b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2282,13 +2282,12 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
 static int ext4_da_writepages(struct address_space *mapping,
 			      struct writeback_control *wbc)
 {
-	struct inode *inode = mapping->host;
 	handle_t *handle = NULL;
-	int needed_blocks;
-	int ret = 0;
-	long to_write;
 	loff_t range_start = 0;
-	long pages_skipped = 0;
+	struct inode *inode = mapping->host;
+	int needed_blocks, ret = 0, nr_to_writebump = 0;
+	long to_write, pages_skipped = 0;
+	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 
 	/*
 	 * No pages to write? This is mainly a kludge to avoid starting
@@ -2297,6 +2296,16 @@ static int ext4_da_writepages(struct address_space *mapping,
 	 */
 	if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
 		return 0;
+	/*
+	 * Make sure nr_to_write is >= sbi->s_mb_stream_request
+	 * This make sure small files blocks are allocated in
+	 * single attempt. This ensure that small files
+	 * get less fragmented.
+	 */
+	if (wbc->nr_to_write < sbi->s_mb_stream_request) {
+		nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
+		wbc->nr_to_write = sbi->s_mb_stream_request;
+	}
 
 	if (!wbc->range_cyclic)
 		/*
@@ -2377,7 +2386,7 @@ restart_loop:
 	}
 
 out_writepages:
-	wbc->nr_to_write = to_write;
+	wbc->nr_to_write = to_write - nr_to_writebump;
 	wbc->range_start = range_start;
 	return ret;
 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 865e9ddb44d4..e0e3a5eb1ddb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3281,6 +3281,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 	mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
 }
 
+/*
+ * Return the prealloc space that have minimal distance
+ * from the goal block. @cpa is the prealloc
+ * space that is having currently known minimal distance
+ * from the goal block.
+ */
+static struct ext4_prealloc_space *
+ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
+			struct ext4_prealloc_space *pa,
+			struct ext4_prealloc_space *cpa)
+{
+	ext4_fsblk_t cur_distance, new_distance;
+
+	if (cpa == NULL) {
+		atomic_inc(&pa->pa_count);
+		return pa;
+	}
+	cur_distance = abs(goal_block - cpa->pa_pstart);
+	new_distance = abs(goal_block - pa->pa_pstart);
+
+	if (cur_distance < new_distance)
+		return cpa;
+
+	/* drop the previous reference */
+	atomic_dec(&cpa->pa_count);
+	atomic_inc(&pa->pa_count);
+	return pa;
+}
+
 /*
  * search goal blocks in preallocated space
  */
@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 	int order, i;
 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
 	struct ext4_locality_group *lg;
-	struct ext4_prealloc_space *pa;
+	struct ext4_prealloc_space *pa, *cpa = NULL;
+	ext4_fsblk_t goal_block;
 
 	/* only data can be preallocated */
 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 		/* The max size of hash table is PREALLOC_TB_SIZE */
 		order = PREALLOC_TB_SIZE - 1;
 
+	goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
+		     ac->ac_g_ex.fe_start +
+		     le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
+	/*
+	 * search for the prealloc space that is having
+	 * minimal distance from the goal block.
+	 */
 	for (i = order; i < PREALLOC_TB_SIZE; i++) {
 		rcu_read_lock();
 		list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 			spin_lock(&pa->pa_lock);
 			if (pa->pa_deleted == 0 &&
 					pa->pa_free >= ac->ac_o_ex.fe_len) {
-				atomic_inc(&pa->pa_count);
-				ext4_mb_use_group_pa(ac, pa);
-				spin_unlock(&pa->pa_lock);
-				ac->ac_criteria = 20;
-				rcu_read_unlock();
-				return 1;
+
+				cpa = ext4_mb_check_group_pa(goal_block,
+								pa, cpa);
 			}
 			spin_unlock(&pa->pa_lock);
 		}
 		rcu_read_unlock();
 	}
+	if (cpa) {
+		ext4_mb_use_group_pa(ac, cpa);
+		ac->ac_criteria = 20;
+		return 1;
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From aab3a8c7a3a6a001dd439ed00d4db17a1059803e Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Tue, 19 Aug 2008 14:23:37 +0000
Subject: [CIFS] reindent misindented statement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 848286861c31..9c548f110102 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode,
 		if ((inode->i_mode & S_IWUGO) == 0 &&
 		    (attr & ATTR_READONLY) == 0)
 			inode->i_mode |= (S_IWUGO & default_mode);
-			inode->i_mode &= ~S_IFMT;
+
+		inode->i_mode &= ~S_IFMT;
 	}
 	/* clear write bits if ATTR_READONLY is set */
 	if (attr & ATTR_READONLY)
-- 
cgit v1.2.3


From cb7691b648bddbfaf6dd8d8068273dbb18d2484c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 18 Aug 2008 15:41:05 -0400
Subject: cifs: add local server pointer to cifs_setup_session

cifs_setup_session references pSesInfo->server several times. That
pointer shouldn't change during the life of the function so grab it
once and store it in a local var. This makes the code look a little
cleaner too.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0711db65afe8..4c13bcdb92a5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3598,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 	char ntlm_session_key[CIFS_SESS_KEY_SIZE];
 	bool ntlmv2_flag = false;
 	int first_time = 0;
+	struct TCP_Server_Info *server = pSesInfo->server;
 
 	/* what if server changes its buffer size after dropping the session? */
-	if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ {
+	if (server->maxBuf == 0) /* no need to send on reconnect */ {
 		rc = CIFSSMBNegotiate(xid, pSesInfo);
-		if (rc == -EAGAIN) /* retry only once on 1st time connection */ {
+		if (rc == -EAGAIN) {
+			/* retry only once on 1st time connection */
 			rc = CIFSSMBNegotiate(xid, pSesInfo);
 			if (rc == -EAGAIN)
 				rc = -EHOSTDOWN;
 		}
 		if (rc == 0) {
 			spin_lock(&GlobalMid_Lock);
-			if (pSesInfo->server->tcpStatus != CifsExiting)
-				pSesInfo->server->tcpStatus = CifsGood;
+			if (server->tcpStatus != CifsExiting)
+				server->tcpStatus = CifsGood;
 			else
 				rc = -EHOSTDOWN;
 			spin_unlock(&GlobalMid_Lock);
@@ -3623,23 +3625,22 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 		goto ss_err_exit;
 
 	pSesInfo->flags = 0;
-	pSesInfo->capabilities = pSesInfo->server->capabilities;
+	pSesInfo->capabilities = server->capabilities;
 	if (linuxExtEnabled == 0)
 		pSesInfo->capabilities &= (~CAP_UNIX);
 	/*	pSesInfo->sequence_number = 0;*/
 	cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
-		 pSesInfo->server->secMode,
-		 pSesInfo->server->capabilities,
-		 pSesInfo->server->timeAdj));
+		 server->secMode, server->capabilities, server->timeAdj));
+
 	if (experimEnabled < 2)
 		rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
 	else if (extended_security
 			&& (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
-			&& (pSesInfo->server->secType == NTLMSSP)) {
+			&& (server->secType == NTLMSSP)) {
 		rc = -EOPNOTSUPP;
 	} else if (extended_security
 			&& (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
-			&& (pSesInfo->server->secType == RawNTLMSSP)) {
+			&& (server->secType == RawNTLMSSP)) {
 		cFYI(1, ("NTLMSSP sesssetup"));
 		rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
 						   nls_info);
@@ -3668,12 +3669,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 
 			} else {
 				SMBNTencrypt(pSesInfo->password,
-					     pSesInfo->server->cryptKey,
+					     server->cryptKey,
 					     ntlm_session_key);
 
 				if (first_time)
 					cifs_calculate_mac_key(
-					     &pSesInfo->server->mac_signing_key,
+					     &server->mac_signing_key,
 					     ntlm_session_key,
 					     pSesInfo->password);
 			}
@@ -3686,13 +3687,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 						      nls_info);
 		}
 	} else { /* old style NTLM 0.12 session setup */
-		SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey,
+		SMBNTencrypt(pSesInfo->password, server->cryptKey,
 			     ntlm_session_key);
 
 		if (first_time)
-			cifs_calculate_mac_key(
-					&pSesInfo->server->mac_signing_key,
-					ntlm_session_key, pSesInfo->password);
+			cifs_calculate_mac_key(&server->mac_signing_key,
+						ntlm_session_key,
+						pSesInfo->password);
 
 		rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
 	}
-- 
cgit v1.2.3


From c16fefa56334e8d0197492607e473fdbb813073f Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Tue, 19 Aug 2008 19:35:33 +0000
Subject: [CIFS] distinguish between Kerberos and MSKerberos in upcall

Properly handle MSKRB5 by passing sec=mskrb5 to the upcall so that the
spengo blob can be generated appropriately. Also, make
decode_negTokenInit prefer whichever mechanism is first in the list.

Needed for some NetApp servers, and possibly some older
versions of Windows which treat the two KRB5 mechanisms differently.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/asn1.c        | 11 ++++++++---
 fs/cifs/cifs_spnego.c |  4 +++-
 fs/cifs/cifsglob.h    |  3 ++-
 fs/cifs/sess.c        |  2 +-
 4 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 5fabd2caf93c..1b09f1670061 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 	unsigned int cls, con, tag, oidlen, rc;
 	bool use_ntlmssp = false;
 	bool use_kerberos = false;
+	bool use_mskerberos = false;
 
 	*secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
 
@@ -574,10 +575,12 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 					 *(oid + 1), *(oid + 2), *(oid + 3)));
 
 				if (compare_oid(oid, oidlen, MSKRB5_OID,
-						MSKRB5_OID_LEN))
-					use_kerberos = true;
+						MSKRB5_OID_LEN) &&
+						!use_kerberos)
+					use_mskerberos = true;
 				else if (compare_oid(oid, oidlen, KRB5_OID,
-						     KRB5_OID_LEN))
+						     KRB5_OID_LEN) &&
+						     !use_mskerberos)
 					use_kerberos = true;
 				else if (compare_oid(oid, oidlen, NTLMSSP_OID,
 						     NTLMSSP_OID_LEN))
@@ -630,6 +633,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 
 	if (use_kerberos)
 		*secType = Kerberos;
+	else if (use_mskerberos)
+		*secType = MSKerberos;
 	else if (use_ntlmssp)
 		*secType = NTLMSSP;
 
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 2434ab0e8791..117ef4bba68e 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -114,9 +114,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 
 	dp = description + strlen(description);
 
-	/* for now, only sec=krb5 is valid */
+	/* for now, only sec=krb5 and sec=mskrb5 are valid */
 	if (server->secType == Kerberos)
 		sprintf(dp, ";sec=krb5");
+	else if (server->secType == MSKerberos)
+		sprintf(dp, ";sec=mskrb5");
 	else
 		goto out;
 
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 7e1cf262effe..8dfd6f24d488 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -80,7 +80,8 @@ enum securityEnum {
 	NTLMv2,			/* Legacy NTLM auth with NTLMv2 hash */
 	RawNTLMSSP,		/* NTLMSSP without SPNEGO */
 	NTLMSSP,		/* NTLMSSP via SPNEGO */
-	Kerberos		/* Kerberos via SPNEGO */
+	Kerberos,		/* Kerberos via SPNEGO */
+	MSKerberos,		/* MS Kerberos via SPNEGO */
 };
 
 enum protocolEnum {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index ed150efbe27c..3188e4d9cddb 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -505,7 +505,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 			unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
 		} else
 			ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
-	} else if (type == Kerberos) {
+	} else if (type == Kerberos || type == MSKerberos) {
 #ifdef CONFIG_CIFS_UPCALL
 		struct cifs_spnego_msg *msg;
 		spnego_key = cifs_get_spnego_key(ses);
-- 
cgit v1.2.3


From 3d2af3465e91335bd1dbf36b19e92079d901409f Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Tue, 19 Aug 2008 20:51:09 +0000
Subject: [CIFS] Kerberos support not considered experimental anymore

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/Kconfig     |  1 -
 fs/cifs/README | 30 ++++++++++++++++++++++++++----
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index d3873583360b..f0427105a619 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1984,7 +1984,6 @@ config CIFS_EXPERIMENTAL
 
 config CIFS_UPCALL
 	  bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
-	  depends on CIFS_EXPERIMENTAL
 	  depends on KEYS
 	  help
 	    Enables an upcall mechanism for CIFS which accesses
diff --git a/fs/cifs/README b/fs/cifs/README
index 2bd6fe556f88..68b5c1169d9d 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -642,8 +642,30 @@ The statistics for the number of total SMBs and oplock breaks are different in
 that they represent all for that share, not just those for which the server
 returned success.
 	
-Also note that "cat /proc/fs/cifs/DebugData" will display information about 
+Also note that "cat /proc/fs/cifs/DebugData" will display information about
 the active sessions and the shares that are mounted.
-Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is
-on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and
-LANMAN support do not require this helper.
+
+Enabling Kerberos (extended security) works but requires version 1.2 or later
+of the helper program cifs.upcall to be present and to be configured in the
+/etc/request-key.conf file.  The cifs.upcall helper program is from the Samba
+project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
+require this helper. Note that NTLMv2 security (which does not require the
+cifs.upcall helper program), instead of using Kerberos, is sufficient for
+some use cases.
+
+Enabling DFS support (used to access shares transparently in an MS-DFS
+global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled.  In
+addition, DFS support for target shares which are specified as UNC
+names which begin with host names (rather than IP addresses) requires
+a user space helper (such as cifs.upcall) to be present in order to
+translate host names to ip address, and the user space helper must also
+be configured in the file /etc/request-key.conf
+
+To use cifs Kerberos and DFS support, the Linux keyutils package should be
+installed and something like the following lines should be added to the
+/etc/request-key.conf file:
+
+create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
+create dns_resolver * * /usr/local/sbin/cifs.upcall %k
+
+
-- 
cgit v1.2.3


From 18496e80f729be5f536d0315751b3bbb95ca913e Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 7 Aug 2008 00:11:12 +0300
Subject: [PATCH] ocfs2/cluster/tcp.c: make some functions static

Commit 0f475b2abed6cbccee1da20a0bef2895eb2a0edd (ocfs2/net: Silence build
warnings) made sense as far as it fixed compile warnings, but it was not
required that it made the functions global.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/cluster/tcp.c          | 44 ++++++++++++++++++++++++++++++++++-------
 fs/ocfs2/cluster/tcp_internal.h | 32 ------------------------------
 2 files changed, 37 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index a27d61581bd6..2bcf706d9dd3 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
 static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
 
 #ifdef CONFIG_DEBUG_FS
-void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
-		    u32 msgkey, struct task_struct *task, u8 node)
+static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
+			   u32 msgkey, struct task_struct *task, u8 node)
 {
 	INIT_LIST_HEAD(&nst->st_net_debug_item);
 	nst->st_task = task;
@@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
 	nst->st_node = node;
 }
 
-void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
 {
 	do_gettimeofday(&nst->st_sock_time);
 }
 
-void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
 {
 	do_gettimeofday(&nst->st_send_time);
 }
 
-void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
 {
 	do_gettimeofday(&nst->st_status_time);
 }
 
-void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
 					 struct o2net_sock_container *sc)
 {
 	nst->st_sc = sc;
 }
 
-void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
+static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
 {
 	nst->st_id = msg_id;
 }
+
+#else  /* CONFIG_DEBUG_FS */
+
+static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
+				  u32 msgkey, struct task_struct *task, u8 node)
+{
+}
+
+static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+{
+}
+
+static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+{
+}
+
+static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+{
+}
+
+static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+						struct o2net_sock_container *sc)
+{
+}
+
+static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
+					u32 msg_id)
+{
+}
+
 #endif /* CONFIG_DEBUG_FS */
 
 static inline int o2net_reconnect_delay(void)
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 18307ff81b77..8d58cfe410b1 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -224,42 +224,10 @@ struct o2net_send_tracking {
 	struct timeval			st_send_time;
 	struct timeval			st_status_time;
 };
-
-void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
-		    u32 msgkey, struct task_struct *task, u8 node);
-void o2net_set_nst_sock_time(struct o2net_send_tracking *nst);
-void o2net_set_nst_send_time(struct o2net_send_tracking *nst);
-void o2net_set_nst_status_time(struct o2net_send_tracking *nst);
-void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
-				  struct o2net_sock_container *sc);
-void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id);
-
 #else
 struct o2net_send_tracking {
 	u32	dummy;
 };
-
-static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
-				  u32 msgkey, struct task_struct *task, u8 node)
-{
-}
-static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
-{
-}
-static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
-{
-}
-static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
-{
-}
-static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
-						struct o2net_sock_container *sc)
-{
-}
-static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
-					u32 msg_id)
-{
-}
 #endif	/* CONFIG_DEBUG_FS */
 
 #endif /* O2CLUSTER_TCP_INTERNAL_H */
-- 
cgit v1.2.3


From a57a874b04e27cb530a0e18c244387452e73ccce Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Wed, 6 Aug 2008 00:50:41 +0400
Subject: [PATCH] ocfs2/cluster/netdebug.c: fix warning

ocfs2/cluster/netdebug.c: fix warning

fs/ocfs2/cluster/netdebug.c:154: warning: format '%lu' expects
     type 'long unsigned int', but argument 17 has type 'suseconds_t'

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/cluster/netdebug.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index d8bfa0eb41b2..52276c02f710 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v)
 			   "  message id:   %d\n"
 			   "  message type: %u\n"
 			   "  message key:  0x%08x\n"
-			   "  sock acquiry: %lu.%lu\n"
-			   "  send start:   %lu.%lu\n"
-			   "  wait start:   %lu.%lu\n",
+			   "  sock acquiry: %lu.%ld\n"
+			   "  send start:   %lu.%ld\n"
+			   "  wait start:   %lu.%ld\n",
 			   nst, (unsigned long)nst->st_task->pid,
 			   (unsigned long)nst->st_task->tgid,
 			   nst->st_task->comm, nst->st_node,
 			   nst->st_sc, nst->st_id, nst->st_msg_type,
 			   nst->st_msg_key,
 			   nst->st_sock_time.tv_sec,
-			   (unsigned long)nst->st_sock_time.tv_usec,
+			   (long)nst->st_sock_time.tv_usec,
 			   nst->st_send_time.tv_sec,
-			   (unsigned long)nst->st_send_time.tv_usec,
+			   (long)nst->st_send_time.tv_usec,
 			   nst->st_status_time.tv_sec,
-			   nst->st_status_time.tv_usec);
+			   (long)nst->st_status_time.tv_usec);
 	}
 
 	spin_unlock(&o2net_debug_lock);
@@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	return sc; /* unused, just needs to be null when done */
 }
 
-#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec
+#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec
 
 static int sc_seq_show(struct seq_file *seq, void *v)
 {
@@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v)
 			   "  remote node:     %s\n"
 			   "  page off:        %zu\n"
 			   "  handshake ok:    %u\n"
-			   "  timer:           %lu.%lu\n"
-			   "  data ready:      %lu.%lu\n"
-			   "  advance start:   %lu.%lu\n"
-			   "  advance stop:    %lu.%lu\n"
-			   "  func start:      %lu.%lu\n"
-			   "  func stop:       %lu.%lu\n"
+			   "  timer:           %lu.%ld\n"
+			   "  data ready:      %lu.%ld\n"
+			   "  advance start:   %lu.%ld\n"
+			   "  advance stop:    %lu.%ld\n"
+			   "  func start:      %lu.%ld\n"
+			   "  func stop:       %lu.%ld\n"
 			   "  func key:        %u\n"
 			   "  func type:       %u\n",
 			   sc,
-- 
cgit v1.2.3


From a1af7d15a18d1e375b0a6fee93789a0bbfe088b4 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Tue, 19 Aug 2008 17:20:28 -0700
Subject: ocfs2: Fix sleep-with-spinlock recovery regression

This fixes a bug introduced with 539d8264093560b917ee3afe4c7f74e5da09d6a5:
    [PATCH 2/2] ocfs2: Fix race between mount and recovery

ocfs2_mark_dead_nodes() was reading journal inodes while holding the
spinlock protecting our in-memory recovery state. The fix is very simple -
the disk state is protected by a cluster lock that's already held, so we
just move the spinlock down past the read.

Reviewed-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/journal.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 7a37240f7a31..c47bc2a809c2 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1418,13 +1418,13 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 {
 	unsigned int node_num;
 	int status, i;
+	u32 gen;
 	struct buffer_head *bh = NULL;
 	struct ocfs2_dinode *di;
 
 	/* This is called with the super block cluster lock, so we
 	 * know that the slot map can't change underneath us. */
 
-	spin_lock(&osb->osb_lock);
 	for (i = 0; i < osb->max_slots; i++) {
 		/* Read journal inode to get the recovery generation */
 		status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
@@ -1433,23 +1433,31 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 			goto bail;
 		}
 		di = (struct ocfs2_dinode *)bh->b_data;
-		osb->slot_recovery_generations[i] =
-					ocfs2_get_recovery_generation(di);
+		gen = ocfs2_get_recovery_generation(di);
 		brelse(bh);
 		bh = NULL;
 
+		spin_lock(&osb->osb_lock);
+		osb->slot_recovery_generations[i] = gen;
+
 		mlog(0, "Slot %u recovery generation is %u\n", i,
 		     osb->slot_recovery_generations[i]);
 
-		if (i == osb->slot_num)
+		if (i == osb->slot_num) {
+			spin_unlock(&osb->osb_lock);
 			continue;
+		}
 
 		status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
-		if (status == -ENOENT)
+		if (status == -ENOENT) {
+			spin_unlock(&osb->osb_lock);
 			continue;
+		}
 
-		if (__ocfs2_recovery_map_test(osb, node_num))
+		if (__ocfs2_recovery_map_test(osb, node_num)) {
+			spin_unlock(&osb->osb_lock);
 			continue;
+		}
 		spin_unlock(&osb->osb_lock);
 
 		/* Ok, we have a slot occupied by another node which
@@ -1465,10 +1473,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 			mlog_errno(status);
 			goto bail;
 		}
-
-		spin_lock(&osb->osb_lock);
 	}
-	spin_unlock(&osb->osb_lock);
 
 	status = 0;
 bail:
-- 
cgit v1.2.3


From 83cab5338fa8c74f979223698c8d4cc88f2ab68e Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 21 Aug 2008 14:14:27 +0800
Subject: ocfs2: Jump to correct label in ocfs2_expand_inline_dir()

When we fail to insert extent in ocfs2_expand_inline_dir(), we should go to
out_commit, not out.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8a1875848080..8e9c4a47d819 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1310,7 +1310,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 				  NULL);
 	if (ret) {
 		mlog_errno(ret);
-		goto out;
+		goto out_commit;
 	}
 
 	ret = ocfs2_journal_dirty(handle, di_bh);
@@ -1336,7 +1336,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 					  len, 0, NULL);
 		if (ret) {
 			mlog_errno(ret);
-			goto out;
+			goto out_commit;
 		}
 	}
 
-- 
cgit v1.2.3


From 9780eb6cfaf7d2d5ccc061eaf94e7aec6a17791e Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Tue, 5 Aug 2008 11:32:46 -0700
Subject: ocfs2: correctly set i_blocks after inline dir gets expanded

We were setting i_blocks based on allocation before the extent insert, which
is wrong as the value is a calculation based on ip_clusters which gets
updated as a result of the insert. This patch moves the line in question
to just after the call to ocfs2_insert_extent().

Without this fix, inline directories were temporarily having an i_blocks
value of zero immediately after expansion to extents.

Reported-and-tested-by: Tristan Ye <tristan.ye@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/dir.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8e9c4a47d819..9cce563fd627 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1300,7 +1300,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	di->i_size = cpu_to_le64(sb->s_blocksize);
 	di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
 	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
-	dir->i_blocks = ocfs2_inode_sector_count(dir);
 
 	/*
 	 * This should never fail as our extent list is empty and all
@@ -1313,6 +1312,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 		goto out_commit;
 	}
 
+	/*
+	 * Set i_blocks after the extent insert for the most up to
+	 * date ip_clusters value.
+	 */
+	dir->i_blocks = ocfs2_inode_sector_count(dir);
+
 	ret = ocfs2_journal_dirty(handle, di_bh);
 	if (ret) {
 		mlog_errno(ret);
-- 
cgit v1.2.3


From de6bf18e9ce0df807dab08cff08751cac383429d Mon Sep 17 00:00:00 2001
From: Louis Rilling <louis.rilling@kerlabs.com>
Date: Fri, 15 Aug 2008 12:37:23 -0700
Subject: [PATCH] configfs: Consolidate locking around configfs_detach_prep()
 in configfs_rmdir()

It appears that configfs_rmdir() can protect configfs_detach_prep() retries with
less calls to {spin,mutex}_{lock,unlock}, and a cleaner code.

This patch does not change any behavior, except that it removes two useless
lock/unlock pairs having nothing inside to protect and providing a useless
barrier.

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <Joel.Becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/dir.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7a8db78a91d2..8e93341f3e82 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1311,16 +1311,18 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 	 * Ensure that no racing symlink() will make detach_prep() fail while
 	 * the new link is temporarily attached
 	 */
-	mutex_lock(&configfs_symlink_mutex);
-	spin_lock(&configfs_dirent_lock);
 	do {
 		struct mutex *wait_mutex;
 
+		mutex_lock(&configfs_symlink_mutex);
+		spin_lock(&configfs_dirent_lock);
 		ret = configfs_detach_prep(dentry, &wait_mutex);
-		if (ret) {
+		if (ret)
 			configfs_detach_rollback(dentry);
-			spin_unlock(&configfs_dirent_lock);
-			mutex_unlock(&configfs_symlink_mutex);
+		spin_unlock(&configfs_dirent_lock);
+		mutex_unlock(&configfs_symlink_mutex);
+
+		if (ret) {
 			if (ret != -EAGAIN) {
 				config_item_put(parent_item);
 				return ret;
@@ -1329,13 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 			/* Wait until the racing operation terminates */
 			mutex_lock(wait_mutex);
 			mutex_unlock(wait_mutex);
-
-			mutex_lock(&configfs_symlink_mutex);
-			spin_lock(&configfs_dirent_lock);
 		}
 	} while (ret == -EAGAIN);
-	spin_unlock(&configfs_dirent_lock);
-	mutex_unlock(&configfs_symlink_mutex);
 
 	/* Get a working ref for the duration of this function */
 	item = configfs_get_config_item(dentry);
-- 
cgit v1.2.3


From 7a8fc9b248e77a4eab0613acf30a6811799786b3 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sun, 17 Aug 2008 17:36:59 +0300
Subject: removed unused #include <linux/version.h>'s

This patch lets the files using linux/version.h match the files that
#include it.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/plat-omap/clock.c                   | 1 -
 arch/cris/arch-v32/kernel/fasttimer.c        | 2 --
 arch/mn10300/kernel/mn10300-serial.c         | 1 -
 arch/powerpc/sysdev/bestcomm/gen_bd.c        | 1 -
 arch/x86/mach-rdc321x/platform.c             | 1 -
 drivers/atm/adummy.c                         | 1 -
 drivers/char/xilinx_hwicap/buffer_icap.h     | 1 -
 drivers/char/xilinx_hwicap/fifo_icap.h       | 1 -
 drivers/char/xilinx_hwicap/xilinx_hwicap.h   | 1 -
 drivers/edac/edac_core.h                     | 1 -
 drivers/i2c/busses/i2c-at91.c                | 1 -
 drivers/infiniband/hw/ehca/ehca_tools.h      | 1 -
 drivers/infiniband/hw/ipath/ipath_fs.c       | 1 -
 drivers/infiniband/hw/nes/nes.h              | 1 -
 drivers/infiniband/ulp/iser/iser_verbs.c     | 1 -
 drivers/input/keyboard/bf54x-keys.c          | 1 -
 drivers/input/touchscreen/mainstone-wm97xx.c | 1 -
 drivers/mfd/asic3.c                          | 1 -
 drivers/misc/eeprom_93cx6.c                  | 1 -
 drivers/mtd/maps/amd76xrom.c                 | 1 -
 drivers/mtd/maps/ck804xrom.c                 | 1 -
 drivers/mtd/maps/esb2rom.c                   | 1 -
 drivers/mtd/nand/au1550nd.c                  | 1 -
 drivers/net/myri10ge/myri10ge.c              | 1 -
 drivers/net/netxen/netxen_nic.h              | 1 -
 drivers/net/netxen/netxen_nic_ethtool.c      | 1 -
 drivers/net/netxen/netxen_nic_hdr.h          | 2 --
 drivers/net/tokenring/lanstreamer.c          | 1 -
 drivers/net/tokenring/lanstreamer.h          | 2 --
 drivers/net/wireless/b43legacy/main.c        | 1 -
 drivers/net/wireless/iwlwifi/iwl-3945-led.c  | 1 -
 drivers/net/wireless/iwlwifi/iwl-led.c       | 1 -
 drivers/net/wireless/iwlwifi/iwl-rfkill.c    | 1 -
 drivers/rtc/rtc-max6902.c                    | 2 --
 drivers/rtc/rtc-r9701.c                      | 1 -
 drivers/s390/net/ctcm_mpc.c                  | 1 -
 drivers/scsi/dpt/dpti_i2o.h                  | 1 -
 drivers/scsi/ips.c                           | 1 -
 drivers/scsi/ips.h                           | 1 -
 drivers/scsi/lpfc/lpfc_debugfs.c             | 1 -
 drivers/scsi/nsp32.c                         | 1 -
 drivers/scsi/nsp32.h                         | 1 -
 drivers/scsi/pcmcia/nsp_cs.c                 | 1 -
 drivers/scsi/qla2xxx/qla_mid.c               | 1 -
 drivers/usb/atm/ueagle-atm.c                 | 1 -
 drivers/usb/gadget/amd5536udc.c              | 1 -
 drivers/usb/gadget/s3c2410_udc.c             | 1 -
 drivers/usb/misc/iowarrior.c                 | 1 -
 drivers/usb/serial/garmin_gps.c              | 2 --
 drivers/video/arkfb.c                        | 1 -
 drivers/video/s3fb.c                         | 1 -
 drivers/video/vermilion/vermilion.h          | 1 -
 drivers/video/vt8623fb.c                     | 1 -
 drivers/video/xilinxfb.c                     | 1 -
 fs/jffs2/jffs2_fs_i.h                        | 1 -
 fs/xfs/xfs_dmapi.h                           | 1 -
 include/asm-x86/xen/hypervisor.h             | 1 -
 include/linux/fs_uart_pd.h                   | 1 -
 kernel/nsproxy.c                             | 1 -
 kernel/power/swap.c                          | 1 -
 kernel/user_namespace.c                      | 1 -
 kernel/utsname.c                             | 1 -
 kernel/utsname_sysctl.c                      | 1 -
 sound/mips/au1x00.c                          | 1 -
 sound/soc/at91/eti_b1_wm8731.c               | 1 -
 sound/soc/codecs/wm8753.c                    | 1 -
 sound/soc/codecs/wm9712.c                    | 1 -
 67 files changed, 72 deletions(-)

(limited to 'fs')

diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c
index 23a070599993..197974defbe4 100644
--- a/arch/arm/plat-omap/clock.c
+++ b/arch/arm/plat-omap/clock.c
@@ -10,7 +10,6 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
diff --git a/arch/cris/arch-v32/kernel/fasttimer.c b/arch/cris/arch-v32/kernel/fasttimer.c
index 2de9d5849ef0..111caa1a2efb 100644
--- a/arch/cris/arch-v32/kernel/fasttimer.c
+++ b/arch/cris/arch-v32/kernel/fasttimer.c
@@ -19,8 +19,6 @@
 #include <asm/irq.h>
 #include <asm/system.h>
 
-#include <linux/version.h>
-
 #include <hwregs/reg_map.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/timer_defs.h>
diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c
index 8b054e7a8ae8..aa07d0cd1905 100644
--- a/arch/mn10300/kernel/mn10300-serial.c
+++ b/arch/mn10300/kernel/mn10300-serial.c
@@ -17,7 +17,6 @@ static const char serial_revdate[] = "2007-11-06";
 #define SUPPORT_SYSRQ
 #endif
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/serial.h>
 #include <linux/circ_buf.h>
diff --git a/arch/powerpc/sysdev/bestcomm/gen_bd.c b/arch/powerpc/sysdev/bestcomm/gen_bd.c
index a3a134c35b0a..e0a53e3147b2 100644
--- a/arch/powerpc/sysdev/bestcomm/gen_bd.c
+++ b/arch/powerpc/sysdev/bestcomm/gen_bd.c
@@ -11,7 +11,6 @@
  *
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
index a037041817c7..4f4e50c3ad3b 100644
--- a/arch/x86/mach-rdc321x/platform.c
+++ b/arch/x86/mach-rdc321x/platform.c
@@ -25,7 +25,6 @@
 #include <linux/list.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
-#include <linux/version.h>
 #include <linux/leds.h>
 
 #include <asm/gpio.h>
diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c
index 2ebd07f2ef81..5effec6f5458 100644
--- a/drivers/atm/adummy.c
+++ b/drivers/atm/adummy.c
@@ -3,7 +3,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
diff --git a/drivers/char/xilinx_hwicap/buffer_icap.h b/drivers/char/xilinx_hwicap/buffer_icap.h
index c5b1840906b2..8b0252bf06e2 100644
--- a/drivers/char/xilinx_hwicap/buffer_icap.h
+++ b/drivers/char/xilinx_hwicap/buffer_icap.h
@@ -38,7 +38,6 @@
 
 #include <linux/types.h>
 #include <linux/cdev.h>
-#include <linux/version.h>
 #include <linux/platform_device.h>
 
 #include <asm/io.h>
diff --git a/drivers/char/xilinx_hwicap/fifo_icap.h b/drivers/char/xilinx_hwicap/fifo_icap.h
index ffabd3ba2bd8..62bda453c90b 100644
--- a/drivers/char/xilinx_hwicap/fifo_icap.h
+++ b/drivers/char/xilinx_hwicap/fifo_icap.h
@@ -38,7 +38,6 @@
 
 #include <linux/types.h>
 #include <linux/cdev.h>
-#include <linux/version.h>
 #include <linux/platform_device.h>
 
 #include <asm/io.h>
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.h b/drivers/char/xilinx_hwicap/xilinx_hwicap.h
index 1f9c8b082dbe..24d0d9b938fb 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.h
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.h
@@ -38,7 +38,6 @@
 
 #include <linux/types.h>
 #include <linux/cdev.h>
-#include <linux/version.h>
 #include <linux/platform_device.h>
 
 #include <asm/io.h>
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index b27b13c5eb5a..4b55ec607a88 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -34,7 +34,6 @@
 #include <linux/platform_device.h>
 #include <linux/sysdev.h>
 #include <linux/workqueue.h>
-#include <linux/version.h>
 
 #define EDAC_MC_LABEL_LEN	31
 #define EDAC_DEVICE_NAME_LEN	31
diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index c1adcdbf7979..9efb02137254 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -14,7 +14,6 @@
 */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/slab.h>
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index ec950bf8c479..21f7d06f14ad 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -54,7 +54,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/vmalloc.h>
-#include <linux/version.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 23faba9d21eb..8bb5170b4e41 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 39bd897b40c6..8eb7ae96974d 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -43,7 +43,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/workqueue.h>
 #include <linux/slab.h>
-#include <linux/version.h>
 #include <asm/io.h>
 #include <linux/crc32c.h>
 
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 63462ecca147..26ff6214a81f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -33,7 +33,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/delay.h>
-#include <linux/version.h>
 
 #include "iscsi_iser.h"
 
diff --git a/drivers/input/keyboard/bf54x-keys.c b/drivers/input/keyboard/bf54x-keys.c
index 54ed8e2e1c02..6f227d3dbda1 100644
--- a/drivers/input/keyboard/bf54x-keys.c
+++ b/drivers/input/keyboard/bf54x-keys.c
@@ -29,7 +29,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 
 #include <linux/init.h>
 #include <linux/fs.h>
diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c
index 283f93a0cee2..37a555f37306 100644
--- a/drivers/input/touchscreen/mainstone-wm97xx.c
+++ b/drivers/input/touchscreen/mainstone-wm97xx.c
@@ -25,7 +25,6 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/delay.h>
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index c6408a62d95e..bc2a807f210d 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -16,7 +16,6 @@
  *
  */
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
diff --git a/drivers/misc/eeprom_93cx6.c b/drivers/misc/eeprom_93cx6.c
index ea55654e5948..15b1780025c8 100644
--- a/drivers/misc/eeprom_93cx6.c
+++ b/drivers/misc/eeprom_93cx6.c
@@ -26,7 +26,6 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/delay.h>
 #include <linux/eeprom_93cx6.h>
 
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 948b86f35ef4..d1eec7d3243f 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -6,7 +6,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <asm/io.h>
diff --git a/drivers/mtd/maps/ck804xrom.c b/drivers/mtd/maps/ck804xrom.c
index effaf7cdefab..1a6feb4474de 100644
--- a/drivers/mtd/maps/ck804xrom.c
+++ b/drivers/mtd/maps/ck804xrom.c
@@ -9,7 +9,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <asm/io.h>
diff --git a/drivers/mtd/maps/esb2rom.c b/drivers/mtd/maps/esb2rom.c
index aa64a4752781..bbbcdd4c8d13 100644
--- a/drivers/mtd/maps/esb2rom.c
+++ b/drivers/mtd/maps/esb2rom.c
@@ -12,7 +12,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <asm/io.h>
diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 761946ea45b1..92c334ff4508 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c
@@ -16,7 +16,6 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
-#include <linux/version.h>
 #include <asm/io.h>
 
 #include <asm/mach-au1x00/au1xxx.h>
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 5d76cd09e246..54cd89cb0838 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -56,7 +56,6 @@
 #include <linux/ethtool.h>
 #include <linux/firmware.h>
 #include <linux/delay.h>
-#include <linux/version.h>
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
 #include <linux/crc32.h>
diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h
index ab871df6b1db..244ab49c4337 100644
--- a/drivers/net/netxen/netxen_nic.h
+++ b/drivers/net/netxen/netxen_nic.h
@@ -45,7 +45,6 @@
 #include <linux/in.h>
 #include <linux/tcp.h>
 #include <linux/skbuff.h>
-#include <linux/version.h>
 
 #include <linux/ethtool.h>
 #include <linux/mii.h>
diff --git a/drivers/net/netxen/netxen_nic_ethtool.c b/drivers/net/netxen/netxen_nic_ethtool.c
index 4ad3e0844b99..b974ca0fc530 100644
--- a/drivers/net/netxen/netxen_nic_ethtool.c
+++ b/drivers/net/netxen/netxen_nic_ethtool.c
@@ -38,7 +38,6 @@
 #include <asm/io.h>
 #include <linux/netdevice.h>
 #include <linux/ethtool.h>
-#include <linux/version.h>
 
 #include "netxen_nic.h"
 #include "netxen_nic_hw.h"
diff --git a/drivers/net/netxen/netxen_nic_hdr.h b/drivers/net/netxen/netxen_nic_hdr.h
index e8e8d73f6ed7..e80f9e3e5973 100644
--- a/drivers/net/netxen/netxen_nic_hdr.h
+++ b/drivers/net/netxen/netxen_nic_hdr.h
@@ -32,8 +32,6 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/version.h>
-
 #include <linux/spinlock.h>
 #include <asm/irq.h>
 #include <linux/init.h>
diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index 47d84cd28097..59d1673f9387 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -119,7 +119,6 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
-#include <linux/version.h>
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
 
diff --git a/drivers/net/tokenring/lanstreamer.h b/drivers/net/tokenring/lanstreamer.h
index e7bb3494afc7..13ccee6449c1 100644
--- a/drivers/net/tokenring/lanstreamer.h
+++ b/drivers/net/tokenring/lanstreamer.h
@@ -60,8 +60,6 @@
  *
  */
 
-#include <linux/version.h>
-
 /* MAX_INTR - the maximum number of times we can loop
  * inside the interrupt function before returning
  * control to the OS (maximum value is 256)
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 2541c81932f0..1cb77db5c292 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -34,7 +34,6 @@
 #include <linux/moduleparam.h>
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
-#include <linux/version.h>
 #include <linux/firmware.h>
 #include <linux/wireless.h>
 #include <linux/workqueue.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-led.c b/drivers/net/wireless/iwlwifi/iwl-3945-led.c
index d3336966b6b5..705c65bed9fd 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-led.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-led.c
@@ -27,7 +27,6 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-led.c b/drivers/net/wireless/iwlwifi/iwl-led.c
index cb11c4a4d691..4eee1b163cd2 100644
--- a/drivers/net/wireless/iwlwifi/iwl-led.c
+++ b/drivers/net/wireless/iwlwifi/iwl-led.c
@@ -27,7 +27,6 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
diff --git a/drivers/net/wireless/iwlwifi/iwl-rfkill.c b/drivers/net/wireless/iwlwifi/iwl-rfkill.c
index e5e5846e9f25..5d642298f04c 100644
--- a/drivers/net/wireless/iwlwifi/iwl-rfkill.c
+++ b/drivers/net/wireless/iwlwifi/iwl-rfkill.c
@@ -27,7 +27,6 @@
  *****************************************************************************/
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/init.h>
 
 #include <net/mac80211.h>
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 12f0310ae89c..78b2551fb19d 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -20,8 +20,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
-
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/init.h>
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index b35f9bfa2af4..395985b339c9 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -14,7 +14,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/device.h>
diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c
index 49ae1cd25caa..2de1e2fccbf9 100644
--- a/drivers/s390/net/ctcm_mpc.c
+++ b/drivers/s390/net/ctcm_mpc.c
@@ -19,7 +19,6 @@
 #undef DEBUGDATA
 #undef DEBUGCCW
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/drivers/scsi/dpt/dpti_i2o.h b/drivers/scsi/dpt/dpti_i2o.h
index 19406cea6d6a..179ad77f6cc9 100644
--- a/drivers/scsi/dpt/dpti_i2o.h
+++ b/drivers/scsi/dpt/dpti_i2o.h
@@ -21,7 +21,6 @@
 
 #include <linux/i2o-dev.h>
 
-#include <linux/version.h>
 #include <linux/notifier.h>
 #include <asm/atomic.h>
 
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 7c615c70ec5c..bc9e6ddf41df 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -165,7 +165,6 @@
 #include <asm/byteorder.h>
 #include <asm/page.h>
 #include <linux/stddef.h>
-#include <linux/version.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index e0657b6f009c..4e49fbcfe8af 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -50,7 +50,6 @@
 #ifndef _IPS_H_
    #define _IPS_H_
 
-#include <linux/version.h>
 #include <linux/nmi.h>
    #include <asm/uaccess.h>
    #include <asm/io.h>
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 90272e65957a..094b47e94b29 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -27,7 +27,6 @@
 #include <linux/pci.h>
 #include <linux/spinlock.h>
 #include <linux/ctype.h>
-#include <linux/version.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c
index edf9fdb3cb3c..22052bb7becb 100644
--- a/drivers/scsi/nsp32.c
+++ b/drivers/scsi/nsp32.c
@@ -23,7 +23,6 @@
  *   1.2: PowerPC (big endian) support.
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/drivers/scsi/nsp32.h b/drivers/scsi/nsp32.h
index 6715ecb3bfca..9565acf1aa72 100644
--- a/drivers/scsi/nsp32.h
+++ b/drivers/scsi/nsp32.h
@@ -16,7 +16,6 @@
 #ifndef _NSP32_H
 #define _NSP32_H
 
-#include <linux/version.h>
 //#define NSP32_DEBUG 9
 
 /*
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index a221b6ef9fa9..24e6cb8396e3 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -25,7 +25,6 @@
 
 ***********************************************************************/
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 50baf6a1d67c..93560cd72784 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -6,7 +6,6 @@
  */
 #include "qla_def.h"
 
-#include <linux/version.h>
 #include <linux/moduleparam.h>
 #include <linux/vmalloc.h>
 #include <linux/smp_lock.h>
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index cb01b5106efd..b6483dd98acc 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -64,7 +64,6 @@
 #include <linux/ctype.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
-#include <linux/version.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
 
diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c
index 1500e1b3c302..abf8192f89e8 100644
--- a/drivers/usb/gadget/amd5536udc.c
+++ b/drivers/usb/gadget/amd5536udc.c
@@ -44,7 +44,6 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/kernel.h>
-#include <linux/version.h>
 #include <linux/delay.h>
 #include <linux/ioport.h>
 #include <linux/sched.h>
diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c
index 538807384592..29d13ebe7500 100644
--- a/drivers/usb/gadget/s3c2410_udc.c
+++ b/drivers/usb/gadget/s3c2410_udc.c
@@ -35,7 +35,6 @@
 #include <linux/list.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
-#include <linux/version.h>
 #include <linux/clk.h>
 
 #include <linux/debugfs.h>
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index e6ca9979e3ae..a4ef77ef917d 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -19,7 +19,6 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/poll.h>
-#include <linux/version.h>
 #include <linux/usb/iowarrior.h>
 
 /* Version Information */
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 2e663f1afd5e..d95382088075 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -38,8 +38,6 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-#include <linux/version.h>
-
 /* the mode to be set when the port ist opened */
 static int initial_mode = 1;
 
diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c
index 4bd569e479a7..314d18694b6a 100644
--- a/drivers/video/arkfb.c
+++ b/drivers/video/arkfb.c
@@ -11,7 +11,6 @@
  *  Code is based on s3fb
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c
index 8361bd0e3df1..4dcec48a1d78 100644
--- a/drivers/video/s3fb.c
+++ b/drivers/video/s3fb.c
@@ -11,7 +11,6 @@
  * which is based on the code of neofb.
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
diff --git a/drivers/video/vermilion/vermilion.h b/drivers/video/vermilion/vermilion.h
index c4aba59d4809..7491abfcf1fc 100644
--- a/drivers/video/vermilion/vermilion.h
+++ b/drivers/video/vermilion/vermilion.h
@@ -30,7 +30,6 @@
 #define _VERMILION_H_
 
 #include <linux/kernel.h>
-#include <linux/version.h>
 #include <linux/pci.h>
 #include <asm/atomic.h>
 #include <linux/mutex.h>
diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c
index 34aae7a2a62b..3df17dc8c3d7 100644
--- a/drivers/video/vt8623fb.c
+++ b/drivers/video/vt8623fb.c
@@ -12,7 +12,6 @@
  * (http://davesdomain.org.uk/viafb/)
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
diff --git a/drivers/video/xilinxfb.c b/drivers/video/xilinxfb.c
index 7b3a8423f485..5da3d2423cc0 100644
--- a/drivers/video/xilinxfb.c
+++ b/drivers/video/xilinxfb.c
@@ -24,7 +24,6 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/version.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/mm.h>
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 31559f45fdde..4c41db91eaa4 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -12,7 +12,6 @@
 #ifndef _JFFS2_FS_I
 #define _JFFS2_FS_I
 
-#include <linux/version.h>
 #include <linux/rbtree.h>
 #include <linux/posix_acl.h>
 #include <linux/mutex.h>
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index cdc2d3464a1a..2813cdd72375 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -18,7 +18,6 @@
 #ifndef __XFS_DMAPI_H__
 #define __XFS_DMAPI_H__
 
-#include <linux/version.h>
 /*	Values used to define the on-disk version of dm_attrname_t. All
  *	on-disk attribute names start with the 8-byte string "SGI_DMI_".
  *
diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h
index 8e15dd28c91f..04ee0610014a 100644
--- a/include/asm-x86/xen/hypervisor.h
+++ b/include/asm-x86/xen/hypervisor.h
@@ -35,7 +35,6 @@
 
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/version.h>
 
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>
diff --git a/include/linux/fs_uart_pd.h b/include/linux/fs_uart_pd.h
index 809bb9ffc788..36b61ff39277 100644
--- a/include/linux/fs_uart_pd.h
+++ b/include/linux/fs_uart_pd.h
@@ -12,7 +12,6 @@
 #ifndef FS_UART_PD_H
 #define FS_UART_PD_H
 
-#include <linux/version.h>
 #include <asm/types.h>
 
 enum fs_uart_id {
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 21575fc46d05..1d3ef29a2583 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -14,7 +14,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/nsproxy.h>
 #include <linux/init_task.h>
 #include <linux/mnt_namespace.h>
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index a0abf9a463f9..80ccac849e46 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/file.h>
 #include <linux/utsname.h>
-#include <linux/version.h>
 #include <linux/delay.h>
 #include <linux/bitops.h>
 #include <linux/genhd.h>
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a9ab0596de44..532858fa5b88 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 64d398f12444..815237a55af8 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -12,7 +12,6 @@
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
-#include <linux/version.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index fe3a56c2256d..4ab9659d269e 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -12,7 +12,6 @@
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
-#include <linux/version.h>
 #include <linux/sysctl.h>
 
 static void *get_uts(ctl_table *table, int write)
diff --git a/sound/mips/au1x00.c b/sound/mips/au1x00.c
index ee0741f9eb53..fbef38a9604a 100644
--- a/sound/mips/au1x00.c
+++ b/sound/mips/au1x00.c
@@ -38,7 +38,6 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/version.h>
 #include <sound/core.h>
 #include <sound/initval.h>
 #include <sound/pcm.h>
diff --git a/sound/soc/at91/eti_b1_wm8731.c b/sound/soc/at91/eti_b1_wm8731.c
index b081e83766b7..b81d6b2cfa1d 100644
--- a/sound/soc/at91/eti_b1_wm8731.c
+++ b/sound/soc/at91/eti_b1_wm8731.c
@@ -22,7 +22,6 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/timer.h>
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index 8604809f0c36..dc7b18fd2782 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -34,7 +34,6 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/delay.h>
diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c
index 1fb7f9a7aecd..2f1c91b1d556 100644
--- a/sound/soc/codecs/wm9712.c
+++ b/sound/soc/codecs/wm9712.c
@@ -13,7 +13,6 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <sound/core.h>
-- 
cgit v1.2.3


From cc996099174dc05b35b7a29301026987990e7f8c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 2 Aug 2008 07:30:48 +0400
Subject: [PATCH] proc: inode number fixlet

Ouch, if number taken from IDA is too big, the intent was to signal an
error, not check for overflow and still do overflowing addition.

One still needs 2^28 proc entries to notice this.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 4fb81e9c94e3..bca0f81eb687 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -330,6 +330,7 @@ retry:
 		spin_lock(&proc_inum_lock);
 		ida_remove(&proc_inum_ida, i);
 		spin_unlock(&proc_inum_lock);
+		return 0;
 	}
 	return PROC_DYNAMIC_FIRST + i;
 }
-- 
cgit v1.2.3


From 2d8a10cd1760e7ecc07a21e409485947c68a3291 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 11 Aug 2008 11:33:57 -0400
Subject: [PATCH] fix efs_lookup()

it needs to use d_splice_alias(), not d_add()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/efs/namei.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 3a404e7fad53..291abb11e20e 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei
 	}
 	unlock_kernel();
 
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino,
-- 
cgit v1.2.3


From e45b590b976465c258f3e2a6cc84573fc19e16d3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 7 Aug 2008 23:49:07 +0200
Subject: [PATCH] change d_add_ci argument ordering

As pointed out during review d_add_ci argument order should match d_add,
so switch the dentry and inode arguments.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c                 | 2 +-
 fs/xfs/linux-2.6/xfs_iops.c | 2 +-
 include/linux/dcache.h      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 101663d15e9f..80e93956aced 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1236,7 +1236,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
  * If no entry exists with the exact case name, allocate new dentry with
  * the exact case, and return the spliced entry.
  */
-struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry,
+struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 			struct qstr *name)
 {
 	int error;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 91bcd979242c..095d271f3434 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -355,7 +355,7 @@ xfs_vn_ci_lookup(
 	/* else case-insensitive match... */
 	dname.name = ci_name.name;
 	dname.len = ci_name.len;
-	dentry = d_add_ci(VFS_I(ip), dentry, &dname);
+	dentry = d_add_ci(dentry, VFS_I(ip), &dname);
 	kmem_free(ci_name.name);
 	return dentry;
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 07aa198f19ed..efba1de629ac 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -230,7 +230,7 @@ extern void d_delete(struct dentry *);
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_anon(struct inode *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
-extern struct dentry * d_add_ci(struct inode *, struct dentry *, struct qstr *);
+extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
 extern void shrink_dcache_for_umount(struct super_block *);
-- 
cgit v1.2.3


From 2690421743b03c9be05d8e44c3b827986d1329a7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 7 Aug 2008 23:50:21 +0200
Subject: [PATCH] ntfs: use d_add_ci

d_add_ci was lifted 1:1 from ntfs.  Change ntfs to use the common
version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ntfs/namei.c | 89 ++-------------------------------------------------------
 1 file changed, 2 insertions(+), 87 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index e1781c8b1650..9e8a95be7a1e 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 	// TODO: Consider moving this lot to a separate function! (AIA)
 handle_name:
    {
-	struct dentry *real_dent, *new_dent;
 	MFT_RECORD *m;
 	ntfs_attr_search_ctx *ctx;
 	ntfs_inode *ni = NTFS_I(dent_inode);
@@ -255,93 +254,9 @@ handle_name:
 	}
 	nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
 
-	/*
-	 * Note: No need for dent->d_lock lock as i_mutex is held on the
-	 * parent inode.
-	 */
-
-	/* Does a dentry matching the nls_name exist already? */
-	real_dent = d_lookup(dent->d_parent, &nls_name);
-	/* If not, create it now. */
-	if (!real_dent) {
-		real_dent = d_alloc(dent->d_parent, &nls_name);
-		kfree(nls_name.name);
-		if (!real_dent) {
-			err = -ENOMEM;
-			goto err_out;
-		}
-		new_dent = d_splice_alias(dent_inode, real_dent);
-		if (new_dent)
-			dput(real_dent);
-		else
-			new_dent = real_dent;
-		ntfs_debug("Done.  (Created new dentry.)");
-		return new_dent;
-	}
+	dent = d_add_ci(dent, dent_inode, &nls_name);
 	kfree(nls_name.name);
-	/* Matching dentry exists, check if it is negative. */
-	if (real_dent->d_inode) {
-		if (unlikely(real_dent->d_inode != dent_inode)) {
-			/* This can happen because bad inodes are unhashed. */
-			BUG_ON(!is_bad_inode(dent_inode));
-			BUG_ON(!is_bad_inode(real_dent->d_inode));
-		}
-		/*
-		 * Already have the inode and the dentry attached, decrement
-		 * the reference count to balance the ntfs_iget() we did
-		 * earlier on.  We found the dentry using d_lookup() so it
-		 * cannot be disconnected and thus we do not need to worry
-		 * about any NFS/disconnectedness issues here.
-		 */
-		iput(dent_inode);
-		ntfs_debug("Done.  (Already had inode and dentry.)");
-		return real_dent;
-	}
-	/*
-	 * Negative dentry: instantiate it unless the inode is a directory and
-	 * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
-	 * in which case d_move() that in place of the found dentry.
-	 */
-	if (!S_ISDIR(dent_inode->i_mode)) {
-		/* Not a directory; everything is easy. */
-		d_instantiate(real_dent, dent_inode);
-		ntfs_debug("Done.  (Already had negative file dentry.)");
-		return real_dent;
-	}
-	spin_lock(&dcache_lock);
-	if (list_empty(&dent_inode->i_dentry)) {
-		/*
-		 * Directory without a 'disconnected' dentry; we need to do
-		 * d_instantiate() by hand because it takes dcache_lock which
-		 * we already hold.
-		 */
-		list_add(&real_dent->d_alias, &dent_inode->i_dentry);
-		real_dent->d_inode = dent_inode;
-		spin_unlock(&dcache_lock);
-		security_d_instantiate(real_dent, dent_inode);
-		ntfs_debug("Done.  (Already had negative directory dentry.)");
-		return real_dent;
-	}
-	/*
-	 * Directory with a 'disconnected' dentry; get a reference to the
-	 * 'disconnected' dentry.
-	 */
-	new_dent = list_entry(dent_inode->i_dentry.next, struct dentry,
-			d_alias);
-	dget_locked(new_dent);
-	spin_unlock(&dcache_lock);
-	/* Do security vodoo. */
-	security_d_instantiate(real_dent, dent_inode);
-	/* Move new_dent in place of real_dent. */
-	d_move(new_dent, real_dent);
-	/* Balance the ntfs_iget() we did above. */
-	iput(dent_inode);
-	/* Throw away real_dent. */
-	dput(real_dent);
-	/* Use new_dent as the actual dentry. */
-	ntfs_debug("Done.  (Already had negative, disconnected directory "
-			"dentry.)");
-	return new_dent;
+	return dent;
 
 eio_err_out:
 	ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk.");
-- 
cgit v1.2.3


From 8f3f655da7288504c1013621090ecc940173ae1c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 12 Aug 2008 00:28:24 -0400
Subject: [PATCH] fix regular readdir() and friends

Handling of -EOVERFLOW.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat.c  | 8 ++++++--
 fs/readdir.c | 8 ++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index c9d1472e65c5..075d0509970d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen,
 	if (buf->result)
 		return -EINVAL;
 	d_ino = ino;
-	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+		buf->result = -EOVERFLOW;
 		return -EOVERFLOW;
+	}
 	buf->result++;
 	dirent = buf->dirent;
 	if (!access_ok(VERIFY_WRITE, dirent,
@@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen,
 	if (reclen > buf->count)
 		return -EINVAL;
 	d_ino = ino;
-	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+		buf->error = -EOVERFLOW;
 		return -EOVERFLOW;
+	}
 	dirent = buf->previous;
 	if (dirent) {
 		if (__put_user(offset, &dirent->d_off))
diff --git a/fs/readdir.c b/fs/readdir.c
index 4e026e5407fb..93a7559bbfd8 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset
 	if (buf->result)
 		return -EINVAL;
 	d_ino = ino;
-	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+		buf->result = -EOVERFLOW;
 		return -EOVERFLOW;
+	}
 	buf->result++;
 	dirent = buf->dirent;
 	if (!access_ok(VERIFY_WRITE, dirent,
@@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
 	if (reclen > buf->count)
 		return -EINVAL;
 	d_ino = ino;
-	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+		buf->error = -EOVERFLOW;
 		return -EOVERFLOW;
+	}
 	dirent = buf->previous;
 	if (dirent) {
 		if (__put_user(offset, &dirent->d_off))
-- 
cgit v1.2.3


From 59af1584bf33810639cb98d79856021253e2177c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Aug 2008 07:24:41 -0400
Subject: [PATCH] fix ->llseek() for a bunch of directories

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_dir.c             | 1 +
 fs/adfs/dir.c               | 1 +
 fs/affs/dir.c               | 1 +
 fs/autofs4/root.c           | 2 ++
 fs/befs/linuxvfs.c          | 1 +
 fs/xfs/linux-2.6/xfs_file.c | 1 +
 6 files changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 88e3787c6ea9..e298fe194093 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 
 const struct file_operations v9fs_dir_operations = {
 	.read = generic_read_dir,
+	.llseek = generic_file_llseek,
 	.readdir = v9fs_dir_readdir,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index fc1a8dc64d78..85a30e929800 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -197,6 +197,7 @@ out:
 
 const struct file_operations adfs_dir_operations = {
 	.read		= generic_read_dir,
+	.llseek		= generic_file_llseek,
 	.readdir	= adfs_readdir,
 	.fsync		= file_fsync,
 };
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 6e3f282424b0..7b36904dbeac 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t);
 
 const struct file_operations affs_dir_operations = {
 	.read		= generic_read_dir,
+	.llseek		= generic_file_llseek,
 	.readdir	= affs_readdir,
 	.fsync		= file_fsync,
 };
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index bcfb2dc0a61b..2a41c2a7fc52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = {
 	.release	= dcache_dir_close,
 	.read		= generic_read_dir,
 	.readdir	= dcache_readdir,
+	.llseek		= dcache_dir_lseek,
 	.ioctl		= autofs4_root_ioctl,
 };
 
@@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = {
 	.release	= dcache_dir_close,
 	.read		= generic_read_dir,
 	.readdir	= dcache_readdir,
+	.llseek		= dcache_dir_lseek,
 };
 
 const struct inode_operations autofs4_indirect_root_inode_operations = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 02c6e62b72f8..740f53672a8a 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep;
 static const struct file_operations befs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= befs_readdir,
+	.llseek		= generic_file_llseek,
 };
 
 static const struct inode_operations befs_dir_inode_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 5f60363b9343..5311c1acdd40 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = {
 const struct file_operations xfs_dir_file_operations = {
 	.read		= generic_read_dir,
 	.readdir	= xfs_file_readdir,
+	.llseek		= generic_file_llseek,
 	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= xfs_file_compat_ioctl,
-- 
cgit v1.2.3


From 4cdfe84b51420c9ac95c7133da2d4c8a191094af Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Aug 2008 07:45:33 -0400
Subject: [PATCH] deal with the first call of ->show() generating no output

seq_read() has a subtle bug - we want the first loop there to go
until at least one *non-empty* record had fit entirely into buffer.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/seq_file.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 5d54205e486b..bd20f7f5a933 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 			goto Done;
 	}
 	/* we need at least one record in buffer */
+	pos = m->index;
+	p = m->op->start(m, &pos);
 	while (1) {
-		pos = m->index;
-		p = m->op->start(m, &pos);
 		err = PTR_ERR(p);
 		if (!p || IS_ERR(p))
 			break;
@@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 			break;
 		if (unlikely(err))
 			m->count = 0;
+		if (unlikely(!m->count)) {
+			p = m->op->next(m, p, &pos);
+			m->index = pos;
+			continue;
+		}
 		if (m->count < m->size)
 			goto Fill;
 		m->op->stop(m, p);
@@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 			goto Enomem;
 		m->count = 0;
 		m->version = 0;
+		pos = m->index;
+		p = m->op->start(m, &pos);
 	}
 	m->op->stop(m, p);
 	m->count = 0;
-- 
cgit v1.2.3


From d6817cdbd143f87f9d7c59a4c3194091190eeb84 Mon Sep 17 00:00:00 2001
From: Joel Becker <Joel.Becker@oracle.com>
Date: Fri, 22 Aug 2008 14:30:10 -0700
Subject: ocfs2: Increment the reference count of an already-active stack.

The ocfs2_stack_driver_request() function failed to increment the
refcount of an already-active stack.  It only did the increment on the
first reference.  Whoops.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Tested-by: Marcos Matsunaga <marcos.matsunaga@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/stackglue.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 10e149ae5e3a..07f348b8d721 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name,
 		goto out;
 	}
 
-	/* Ok, the stack is pinned */
-	p->sp_count++;
 	active_stack = p;
-
 	rc = 0;
 
 out:
+	/* If we found it, pin it */
+	if (!rc)
+		active_stack->sp_count++;
+
 	spin_unlock(&ocfs2_stack_lock);
 	return rc;
 }
-- 
cgit v1.2.3


From 6ce5eecb9cd3ac97b952c50309b87c31488a45e9 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Tue, 26 Aug 2008 00:37:14 +0000
Subject: [CIFS] check version in spnego upcall response

Currently, we don't check the version in the SPNEGO upcall response
even though one is provided. Jeff and Q have made the corresponding
change to the Samba client (cifs.upcall).

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES       | 6 +++++-
 fs/cifs/cifs_spnego.h | 2 +-
 fs/cifs/sess.c        | 9 +++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index f5d0083e09fa..526041a52d35 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -4,7 +4,11 @@ Fix premature write failure on congested networks (we would give up
 on EAGAIN from the socket too quickly on large writes).
 Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
 Fix endian problems in acl (mode from/to cifs acl) on bigendian
-architectures.
+architectures.  Fix problems with preserving timestamps on copying open
+files (e.g. "cp -a") to Windows servers.  For mkdir and create honor setgid bit
+on parent directory when server supports Unix Extensions but not POSIX
+create. Update cifs.upcall version to handle new Kerberos sec flags
+(this requires update of cifs.upcall program from Samba).
 
 Version 1.53
 ------------
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index 05a34b17a1ab..e4041ec4d712 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -23,7 +23,7 @@
 #ifndef _CIFS_SPNEGO_H
 #define _CIFS_SPNEGO_H
 
-#define CIFS_SPNEGO_UPCALL_VERSION 1
+#define CIFS_SPNEGO_UPCALL_VERSION 2
 
 /*
  * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION.
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 3188e4d9cddb..b537fad3bf50 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -516,6 +516,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 		}
 
 		msg = spnego_key->payload.data;
+		/* check version field to make sure that cifs.upcall is
+		   sending us a response in an expected form */
+		if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
+			cERROR(1, ("incorrect version of cifs.upcall (expected"
+				   " %d but got %d)",
+				   CIFS_SPNEGO_UPCALL_VERSION, msg->version));
+			rc = -EKEYREJECTED;
+			goto ssetup_exit;
+		}
 		/* bail out if key is too long */
 		if (msg->sesskey_len >
 		    sizeof(ses->server->mac_signing_key.data.krb5)) {
-- 
cgit v1.2.3


From e9775843ecb039318dbc9ded6da9c762bff28a0b Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Tue, 26 Aug 2008 18:22:50 +0000
Subject: [CIFS] Correct keys dependency for cifs kerberos support

Must also depend on CIFS ...

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index f0427105a619..3fab3901e0ef 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1984,7 +1984,7 @@ config CIFS_EXPERIMENTAL
 
 config CIFS_UPCALL
 	  bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
-	  depends on KEYS
+	  depends on CIFS && KEYS
 	  help
 	    Enables an upcall mechanism for CIFS which accesses
 	    userspace helper utilities to provide SPNEGO packaged (RFC 4178)
-- 
cgit v1.2.3


From 96c2a1137b9e00bcdbe3a95113ea8f42ca994f76 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Tue, 26 Aug 2008 18:32:28 +0000
Subject: [CIFS] Reorder cifs config item for better clarity

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/Kconfig | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 3fab3901e0ef..abccb5dab9a8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1930,6 +1930,16 @@ config CIFS_WEAK_PW_HASH
 
 	  If unsure, say N.
 
+config CIFS_UPCALL
+	  bool "Kerberos/SPNEGO advanced session setup"
+	  depends on CIFS && KEYS
+	  help
+	    Enables an upcall mechanism for CIFS which accesses
+	    userspace helper utilities to provide SPNEGO packaged (RFC 4178)
+	    Kerberos tickets which are needed to mount to certain secure servers
+	    (for which more secure Kerberos authentication is required). If
+	    unsure, say N.
+
 config CIFS_XATTR
         bool "CIFS extended attributes"
         depends on CIFS
@@ -1982,16 +1992,6 @@ config CIFS_EXPERIMENTAL
 	    (which is disabled by default). See the file fs/cifs/README 
 	    for more details.  If unsure, say N.
 
-config CIFS_UPCALL
-	  bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
-	  depends on CIFS && KEYS
-	  help
-	    Enables an upcall mechanism for CIFS which accesses
-	    userspace helper utilities to provide SPNEGO packaged (RFC 4178)
-	    Kerberos tickets which are needed to mount to certain secure servers
-	    (for which more secure Kerberos authentication is required). If
-	    unsure, say N.
-
 config CIFS_DFS_UPCALL
 	  bool "DFS feature support (EXPERIMENTAL)"
 	  depends on CIFS_EXPERIMENTAL
-- 
cgit v1.2.3


From 48fd4f93a00eac844678629f2f00518e146ed30d Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 22 Aug 2008 10:00:36 +0200
Subject: block: submit_bh() inadvertently discards barrier flag on a sync
 write

Reported by Milan Broz <mbroz@redhat.com>, commit 18ce3751 inadvertently
made submit_bh() discard the barrier bit for a WRITE_SYNC request. Fix
that up.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/buffer.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 38653e36e225..ac78d4c19b3b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh)
 	BUG_ON(!buffer_mapped(bh));
 	BUG_ON(!bh->b_end_io);
 
-	if (buffer_ordered(bh) && (rw == WRITE))
-		rw = WRITE_BARRIER;
+	/*
+	 * Mask in barrier bit for a write (could be either a WRITE or a
+	 * WRITE_SYNC
+	 */
+	if (buffer_ordered(bh) && (rw & WRITE))
+		rw |= WRITE_BARRIER;
 
 	/*
-	 * Only clear out a write error when rewriting, should this
-	 * include WRITE_SYNC as well?
+	 * Only clear out a write error when rewriting
 	 */
-	if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER))
+	if (test_set_buffer_req(bh) && (rw & WRITE))
 		clear_buffer_write_io_error(bh);
 
 	/*
-- 
cgit v1.2.3


From 76029ff37f31dad64641489c610d98955217bb68 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 25 Aug 2008 20:36:08 +0200
Subject: bio: fix bio_copy_kern() handling of bio->bv_len

The commit 68154e90c9d1492d570671ae181d9a8f8530da55 introduced
bio_copy_kern() to add bounce support to blk_rq_map_kern.

bio_copy_kern() uses bio->bv_len to copy data for READ commands after
the completion but it doesn't work with a request that partially
completed. SCSI always completes a PC request as a whole but seems
some don't.

This patch fixes bio_copy_kern to handle the above case. As
bio_copy_user does, bio_copy_kern uses struct bio_map_data to store
struct bio_vec.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Reported-by: Nix <nix@esperi.org.uk>
Tested-by: Nix <nix@esperi.org.uk>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 8000e2fa16cb..8b1f5ee6f83c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -469,20 +469,21 @@ static void bio_free_map_data(struct bio_map_data *bmd)
 	kfree(bmd);
 }
 
-static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
+static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
+					       gfp_t gfp_mask)
 {
-	struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL);
+	struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
 
 	if (!bmd)
 		return NULL;
 
-	bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL);
+	bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
 	if (!bmd->iovecs) {
 		kfree(bmd);
 		return NULL;
 	}
 
-	bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL);
+	bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
 	if (bmd->sgvecs)
 		return bmd;
 
@@ -596,7 +597,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
 		len += iov[i].iov_len;
 	}
 
-	bmd = bio_alloc_map_data(nr_pages, iov_count);
+	bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL);
 	if (!bmd)
 		return ERR_PTR(-ENOMEM);
 
@@ -942,19 +943,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
 {
 	struct bio_vec *bvec;
 	const int read = bio_data_dir(bio) == READ;
-	char *p = bio->bi_private;
+	struct bio_map_data *bmd = bio->bi_private;
 	int i;
+	char *p = bmd->sgvecs[0].iov_base;
 
 	__bio_for_each_segment(bvec, bio, i, 0) {
 		char *addr = page_address(bvec->bv_page);
+		int len = bmd->iovecs[i].bv_len;
 
 		if (read && !err)
-			memcpy(p, addr, bvec->bv_len);
+			memcpy(p, addr, len);
 
 		__free_page(bvec->bv_page);
-		p += bvec->bv_len;
+		p += len;
 	}
 
+	bio_free_map_data(bmd);
 	bio_put(bio);
 }
 
@@ -978,11 +982,21 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
 	const int nr_pages = end - start;
 	struct bio *bio;
 	struct bio_vec *bvec;
+	struct bio_map_data *bmd;
 	int i, ret;
+	struct sg_iovec iov;
+
+	iov.iov_base = data;
+	iov.iov_len = len;
+
+	bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
+	if (!bmd)
+		return ERR_PTR(-ENOMEM);
 
+	ret = -ENOMEM;
 	bio = bio_alloc(gfp_mask, nr_pages);
 	if (!bio)
-		return ERR_PTR(-ENOMEM);
+		goto out_bmd;
 
 	while (len) {
 		struct page *page;
@@ -1016,14 +1030,18 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
 		}
 	}
 
-	bio->bi_private = data;
+	bio->bi_private = bmd;
 	bio->bi_end_io = bio_copy_kern_endio;
+
+	bio_set_map_data(bmd, bio, &iov, 1);
 	return bio;
 cleanup:
 	bio_for_each_segment(bvec, bio, i)
 		__free_page(bvec->bv_page);
 
 	bio_put(bio);
+out_bmd:
+	bio_free_map_data(bmd);
 
 	return ERR_PTR(ret);
 }
-- 
cgit v1.2.3


From aefcc28a3a63ac33a298777aa50ba43641c75241 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 25 Aug 2008 20:36:08 +0200
Subject: bio: fix __bio_copy_iov() handling of bio->bv_len

The commit c5dec1c3034f1ae3503efbf641ff3b0273b64797 introduced
__bio_copy_iov() to add bounce support to blk_rq_map_user_iov.

__bio_copy_iov() uses bio->bv_len to copy data for READ commands after
the completion but it doesn't work with a request that partially
completed. SCSI always completes a PC request as a whole but seems
some don't.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index 8b1f5ee6f83c..3cba7ae34d75 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -492,8 +492,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
 	return NULL;
 }
 
-static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
-			  int uncopy)
+static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
+			  struct sg_iovec *iov, int iov_count, int uncopy)
 {
 	int ret = 0, i;
 	struct bio_vec *bvec;
@@ -503,7 +503,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
 
 	__bio_for_each_segment(bvec, bio, i, 0) {
 		char *bv_addr = page_address(bvec->bv_page);
-		unsigned int bv_len = bvec->bv_len;
+		unsigned int bv_len = iovecs[i].bv_len;
 
 		while (bv_len && iov_idx < iov_count) {
 			unsigned int bytes;
@@ -555,7 +555,7 @@ int bio_uncopy_user(struct bio *bio)
 	struct bio_map_data *bmd = bio->bi_private;
 	int ret;
 
-	ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1);
+	ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
 
 	bio_free_map_data(bmd);
 	bio_put(bio);
@@ -634,7 +634,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
 	 * success
 	 */
 	if (!write_to_vm) {
-		ret = __bio_copy_iov(bio, iov, iov_count, 0);
+		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0);
 		if (ret)
 			goto cleanup;
 	}
-- 
cgit v1.2.3


From 87ed1d65fb536a0cd4e84874c0b038f953e448aa Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 27 Aug 2008 17:53:30 +0000
Subject: [CIFS] Add destroy routine for dns_resolver

Otherwise, we're leaking the payload memory.

CC: Stable Kernel <stable@vger.kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES       | 3 ++-
 fs/cifs/dns_resolve.c | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 526041a52d35..f9e4ad97a79e 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -8,7 +8,8 @@ architectures.  Fix problems with preserving timestamps on copying open
 files (e.g. "cp -a") to Windows servers.  For mkdir and create honor setgid bit
 on parent directory when server supports Unix Extensions but not POSIX
 create. Update cifs.upcall version to handle new Kerberos sec flags
-(this requires update of cifs.upcall program from Samba).
+(this requires update of cifs.upcall program from Samba).  Fix memory leak
+on dns_upcall (resolving DFS referralls).
 
 Version 1.53
 ------------
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index f730ef35499e..a2e0673e1b08 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -47,11 +47,18 @@ static int dns_resolver_instantiate(struct key *key, const void *data,
 	return rc;
 }
 
+static void
+dns_resolver_destroy(struct key *key)
+{
+	kfree(key->payload.data);
+}
+
 struct key_type key_type_dns_resolver = {
 	.name        = "dns_resolver",
 	.def_datalen = sizeof(struct in_addr),
 	.describe    = user_describe,
 	.instantiate = dns_resolver_instantiate,
+	.destroy     = dns_resolver_destroy,
 	.match       = user_match,
 };
 
-- 
cgit v1.2.3


From bcc55c6664a90146149ba0fd93052adc94287b9f Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 27 Aug 2008 21:30:22 +0000
Subject: [CIFS] Fix plaintext authentication

The last eight bytes of the password field were not cleared when doing lanman plaintext password authentication. This patch fixes that.

I tested it with Samba by setting password
encryption to no in the server's smb.conf.  Other servers also can be
configured to force plaintext authentication.    Note that plaintexti
authentication requires setting /proc/fs/cifs/SecurityFlags to 0x30030
on the client (enabling both LANMAN and also plaintext password support).
Also note that LANMAN support (and thus plaintext password support) requires
CONFIG_CIFS_WEAK_PW_HASH to be enabled in menuconfig.

CC: Jeff Layton <jlayton@redhat.com>
CC: Stable Kernel <stable@vger.kernel.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsencrypt.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 83fd40dc1ef0..bd5f13d38450 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
 
 	if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0)
 		if (extended_security & CIFSSEC_MAY_PLNTXT) {
+			memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
 			memcpy(lnm_session_key, password_with_pad,
 				CIFS_ENCPWD_SIZE);
 			return;
-- 
cgit v1.2.3


From 838726c4756813576078203eb7e1e219db0da870 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 28 Aug 2008 07:54:59 -0400
Subject: cifs: fix O_APPEND on directio mounts

The direct I/O write codepath for CIFS is done through
cifs_user_write(). That function does not currently call
generic_write_checks() so the file position isn't being properly set
when the file is opened with O_APPEND.  It's also not doing the other
"normal" checks that should be done for a write call.

The problem is currently that when you open a file with O_APPEND on a
mount with the directio mount option, the file position is set to the
beginning of the file. This makes any subsequent writes clobber the data
in the file starting at the beginning.

This seems to fix the problem in cursory testing. It is, however
important to note that NFS disallows the combination of
(O_DIRECT|O_APPEND). If my understanding is correct, the concern is
races with multiple clients appending to a file clobbering each others'
data. Since the write model for CIFS and NFS is pretty similar in this
regard, CIFS is probably subject to the same sort of races. What's
unclear to me is why this is a particular problem with O_DIRECT and not
with buffered writes...

Regardless, disallowing O_APPEND on an entire mount is probably not
reasonable, so we'll probably just have to deal with it and reevaluate
this flag combination when we get proper support for O_DIRECT. In the
meantime this patch at least fixes the existing problem.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Cc: Stable Tree <stable@kernel.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/file.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ff14d14903a0..cbefe1f1f9fe 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -833,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 		return -EBADF;
 	open_file = (struct cifsFileInfo *) file->private_data;
 
+	rc = generic_write_checks(file, poffset, &write_size, 0);
+	if (rc)
+		return rc;
+
 	xid = GetXid();
 
 	if (*poffset > file->f_path.dentry->d_inode->i_size)
-- 
cgit v1.2.3


From 2e655021b8d50b5d90ce442f3de6bf3667729910 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 Aug 2008 15:30:06 +0000
Subject: [CIFS] update cifs change log

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES |  5 ++++-
 fs/cifs/README  | 14 ++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index f9e4ad97a79e..06e521a945c3 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -9,7 +9,10 @@ files (e.g. "cp -a") to Windows servers.  For mkdir and create honor setgid bit
 on parent directory when server supports Unix Extensions but not POSIX
 create. Update cifs.upcall version to handle new Kerberos sec flags
 (this requires update of cifs.upcall program from Samba).  Fix memory leak
-on dns_upcall (resolving DFS referralls).
+on dns_upcall (resolving DFS referralls).  Fix plain text password
+authentication (requires setting SecurityFlags to 0x30030 to enable
+lanman and plain text though).  Fix writes to be at correct offset when
+file is open with O_APPEND and file is on a directio (forcediretio) mount.
 
 Version 1.53
 ------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 68b5c1169d9d..bd2343d4c6a6 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -542,10 +542,20 @@ SecurityFlags		Flags which control security negotiation and
 			hashing mechanisms (as "must use") on the other hand 
 			does not make much sense. Default flags are 
 				0x07007 
-			(NTLM, NTLMv2 and packet signing allowed).  Maximum 
+			(NTLM, NTLMv2 and packet signing allowed).  The maximum 
 			allowable flags if you want to allow mounts to servers
 			using weaker password hashes is 0x37037 (lanman,
-			plaintext, ntlm, ntlmv2, signing allowed):
+			plaintext, ntlm, ntlmv2, signing allowed).  Some
+			SecurityFlags require the corresponding menuconfig
+			options to be enabled (lanman and plaintext require
+			CONFIG_CIFS_WEAK_PW_HASH for example).  Enabling
+			plaintext authentication currently requires also
+			enabling lanman authentication in the security flags
+			because the cifs module only supports sending
+			laintext passwords using the older lanman dialect
+			form of the session setup SMB.  (e.g. for authentication
+			using plain text passwords, set the SecurityFlags
+			to 0x30030):
  
 			may use packet signing 				0x00001
 			must use packet signing				0x01001
-- 
cgit v1.2.3


From c76da9da1fffa6de263486df54950eb328d58f71 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 Aug 2008 15:32:22 +0000
Subject: [CIFS] Turn off Unicode during session establishment for plaintext
 authentication

LANMAN session setup did not support Unicode (after session setup, unicode can
still be used though).

Fixes samba bug# 5319

CC: Jeff Layton <jlayton@redhat.com>
CC: Stable Kernel <stable@vger.kernel.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/sess.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index b537fad3bf50..252fdc0567f1 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 		char lnm_session_key[CIFS_SESS_KEY_SIZE];
 
+		pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
+
 		/* no capabilities flags in old lanman negotiation */
 
 		pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
-- 
cgit v1.2.3


From c228c24bf1138d4757dbe20615df655815446da3 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 21 Aug 2008 08:42:16 -0400
Subject: nfsd: fix compound state allocation error handling

Move the cstate_alloc call so that if it fails, the response is setup to
encode the NFS error. The out label now means that the
nfsd4_compound_state has not been allocated.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4proc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2e51adac65de..e5b51ffafc6c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	int		slack_bytes;
 	__be32		status;
 
-	status = nfserr_resource;
-	cstate = cstate_alloc();
-	if (cstate == NULL)
-		goto out;
-
 	resp->xbuf = &rqstp->rq_res;
 	resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len;
 	resp->tagp = resp->p;
@@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION)
 		goto out;
 
+	status = nfserr_resource;
+	cstate = cstate_alloc();
+	if (cstate == NULL)
+		goto out;
+
 	status = nfs_ok;
 	while (!status && resp->opcnt < args->opcnt) {
 		op = &args->ops[resp->opcnt++];
@@ -957,9 +957,9 @@ encode_op:
 		nfsd4_increment_op_stats(op->opnum);
 	}
 
+	cstate_free(cstate);
 out:
 	nfsd4_release_compoundargs(args);
-	cstate_free(cstate);
 	dprintk("nfsv4 compound returned %d\n", ntohl(status));
 	return status;
 }
-- 
cgit v1.2.3


From 91b80969ba466ba4b915a4a1d03add8c297add3f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 29 Aug 2008 19:18:45 -0400
Subject: nfsd: fix buffer overrun decoding NFSv4 acl

The array we kmalloc() here is not large enough.

Thanks to Johann Dahm and David Richter for bug report and testing.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: David Richter <richterd@citi.umich.edu>
Tested-by: Johann Dahm <jdahm@umich.edu>
---
 fs/nfsd/nfs4acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index b6ed38380ab8..54b8b4140c8f 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt)
 	 * enough space for either:
 	 */
 	alloc = sizeof(struct posix_ace_state_array)
-		+ cnt*sizeof(struct posix_ace_state);
+		+ cnt*sizeof(struct posix_user_ace_state);
 	state->users = kzalloc(alloc, GFP_KERNEL);
 	if (!state->users)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 169ccbd44eb20f5bb7e4352451eba25397e29749 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 2 Sep 2008 14:35:37 -0700
Subject: NTFS: update homepage

Update the location of the NTFS homepage in several files.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/ntfs.txt | 4 ++--
 MAINTAINERS                        | 2 +-
 fs/ntfs/usnjrnl.h                  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index e79ee2db183a..ac2a261c5f7d 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -40,7 +40,7 @@ Web site
 ========
 
 There is plenty of additional information on the linux-ntfs web site
-at http://linux-ntfs.sourceforge.net/
+at http://www.linux-ntfs.org/
 
 The web site has a lot of additional information, such as a comprehensive
 FAQ, documentation on the NTFS on-disk format, information on the Linux-NTFS
@@ -272,7 +272,7 @@ And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 =
 For Win2k and later dynamic disks, you can for example use the ldminfo utility
 which is part of the Linux LDM tools (the latest version at the time of
 writing is linux-ldm-0.0.8.tar.bz2).  You can download it from:
-	http://linux-ntfs.sourceforge.net/downloads.html
+	http://www.linux-ntfs.org/
 Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go
 into it (cd linux-ldm-0.0.8) and change to the test directory (cd test).  You
 will find the precompiled (i386) ldminfo utility there.  NOTE: You will not be
diff --git a/MAINTAINERS b/MAINTAINERS
index c4ca99cf80df..be83f3424f3b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3051,7 +3051,7 @@ P:	Anton Altaparmakov
 M:	aia21@cantab.net
 L:	linux-ntfs-dev@lists.sourceforge.net
 L:	linux-kernel@vger.kernel.org
-W:	http://linux-ntfs.sf.net/
+W:	http://www.linux-ntfs.org/
 T:	git kernel.org:/pub/scm/linux/kernel/git/aia21/ntfs-2.6.git
 S:	Maintained
 
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
index 3a8af75351e8..4087fbdac327 100644
--- a/fs/ntfs/usnjrnl.h
+++ b/fs/ntfs/usnjrnl.h
@@ -113,7 +113,7 @@ typedef struct {
  * Reason flags (32-bit).  Cumulative flags describing the change(s) to the
  * file since it was last opened.  I think the names speak for themselves but
  * if you disagree check out the descriptions in the Linux NTFS project NTFS
- * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ * documentation: http://www.linux-ntfs.org/
  */
 enum {
 	USN_REASON_DATA_OVERWRITE	= const_cpu_to_le32(0x00000001),
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS;
  * Source info flags (32-bit).  Information about the source of the change(s)
  * to the file.  For detailed descriptions of what these mean, see the Linux
  * NTFS project NTFS documentation:
- *	http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ *	http://www.linux-ntfs.org/
  */
 enum {
 	USN_SOURCE_DATA_MANAGEMENT	  = const_cpu_to_le32(0x00000001),
-- 
cgit v1.2.3


From 4b8561521dbaa3d766b198496b220e984e3bf756 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 2 Sep 2008 14:35:53 -0700
Subject: mm: show quicklist usage in /proc/meminfo

Quicklists can consume several GB of memory.  We should provide a means of
monitoring this.

After this patch is applied, /proc/meminfo will output the following:

% cat /proc/meminfo

MemTotal:      7715392 kB
MemFree:       5401600 kB
Buffers:         80384 kB
Cached:         300800 kB
SwapCached:          0 kB
Active:         235584 kB
Inactive:       262656 kB
SwapTotal:     2031488 kB
SwapFree:      2031488 kB
Dirty:            3520 kB
Writeback:           0 kB
AnonPages:      117696 kB
Mapped:          38528 kB
Slab:          1589952 kB
SReclaimable:    23104 kB
SUnreclaim:    1566848 kB
PageTables:      14656 kB
NFS_Unstable:        0 kB
Bounce:              0 kB
WritebackTmp:        0 kB
CommitLimit:   5889152 kB
Committed_AS:   393152 kB
VmallocTotal: 17592177655808 kB
VmallocUsed:     29056 kB
VmallocChunk: 17592177626432 kB
Quicklists:     130944 kB
HugePages_Total:     0
HugePages_Free:      0
HugePages_Rsvd:      0
HugePages_Surp:      0
Hugepagesize:    262144 kB

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Keiichiro Tokunaga <tokunaga.keiich@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_misc.c       | 7 +++++--
 include/linux/quicklist.h | 7 +++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index ded969862960..00f10a2dcf12 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -24,6 +24,7 @@
 #include <linux/tty.h>
 #include <linux/string.h>
 #include <linux/mman.h>
+#include <linux/quicklist.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/mm.h>
@@ -189,7 +190,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		"Committed_AS: %8lu kB\n"
 		"VmallocTotal: %8lu kB\n"
 		"VmallocUsed:  %8lu kB\n"
-		"VmallocChunk: %8lu kB\n",
+		"VmallocChunk: %8lu kB\n"
+		"Quicklists:   %8lu kB\n",
 		K(i.totalram),
 		K(i.freeram),
 		K(i.bufferram),
@@ -221,7 +223,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		K(committed),
 		(unsigned long)VMALLOC_TOTAL >> 10,
 		vmi.used >> 10,
-		vmi.largest_chunk >> 10
+		vmi.largest_chunk >> 10,
+		K(quicklist_total_size())
 		);
 
 		len += hugetlb_report_meminfo(page + len);
diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h
index 39b66713a0bb..bd466439c588 100644
--- a/include/linux/quicklist.h
+++ b/include/linux/quicklist.h
@@ -80,6 +80,13 @@ void quicklist_trim(int nr, void (*dtor)(void *),
 
 unsigned long quicklist_total_size(void);
 
+#else
+
+static inline unsigned long quicklist_total_size(void)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* LINUX_QUICKLIST_H */
-- 
cgit v1.2.3


From 49048622eae698e5c4ae61f7e71200f265ccc529 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Fri, 5 Sep 2008 18:12:23 +0200
Subject: sched: fix process time monotonicity

Spencer reported a problem where utime and stime were going negative despite
the fixes in commit b27f03d4bdc145a09fb7b0c0e004b29f1ee555fa. The suspected
reason for the problem is that signal_struct maintains it's own utime and
stime (of exited tasks), these are not updated using the new task_utime()
routine, hence sig->utime can go backwards and cause the same problem
to occur (sig->utime, adds tsk->utime and not task_utime()). This patch
fixes the problem

TODO: using max(task->prev_utime, derived utime) works for now, but a more
generic solution is to implement cputime_max() and use the cputime_gt()
function for comparison.

Reported-by: spencer@bluehost.com
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       | 59 ---------------------------------------------------
 include/linux/sched.h |  4 ++++
 kernel/exit.c         |  6 +++---
 kernel/sched.c        | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0d6eb33597c6..71c9be59c9c2 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 	return 0;
 }
 
-/*
- * Use precise platform statistics if available:
- */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-static cputime_t task_utime(struct task_struct *p)
-{
-	return p->utime;
-}
-
-static cputime_t task_stime(struct task_struct *p)
-{
-	return p->stime;
-}
-#else
-static cputime_t task_utime(struct task_struct *p)
-{
-	clock_t utime = cputime_to_clock_t(p->utime),
-		total = utime + cputime_to_clock_t(p->stime);
-	u64 temp;
-
-	/*
-	 * Use CFS's precise accounting:
-	 */
-	temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
-
-	if (total) {
-		temp *= utime;
-		do_div(temp, total);
-	}
-	utime = (clock_t)temp;
-
-	p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
-	return p->prev_utime;
-}
-
-static cputime_t task_stime(struct task_struct *p)
-{
-	clock_t stime;
-
-	/*
-	 * Use CFS's precise accounting. (we subtract utime from
-	 * the total, to make sure the total observed by userspace
-	 * grows monotonically - apps rely on that):
-	 */
-	stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
-			cputime_to_clock_t(task_utime(p));
-
-	if (stime >= 0)
-		p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
-
-	return p->prev_stime;
-}
-#endif
-
-static cputime_t task_gtime(struct task_struct *p)
-{
-	return p->gtime;
-}
-
 static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task, int whole)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cfb0d87b99fc..3d9120c5ad15 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1475,6 +1475,10 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
+extern cputime_t task_utime(struct task_struct *p);
+extern cputime_t task_stime(struct task_struct *p);
+extern cputime_t task_gtime(struct task_struct *p);
+
 /*
  * Per process flags
  */
diff --git a/kernel/exit.c b/kernel/exit.c
index 25ed2ad986df..16395644a98f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -112,9 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime = cputime_add(sig->utime, tsk->utime);
-		sig->stime = cputime_add(sig->stime, tsk->stime);
-		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
+		sig->utime = cputime_add(sig->utime, task_utime(tsk));
+		sig->stime = cputime_add(sig->stime, task_stime(tsk));
+		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
diff --git a/kernel/sched.c b/kernel/sched.c
index 9a1ddb84e26d..1a5f73c1fcdc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4178,6 +4178,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 		cpustat->steal = cputime64_add(cpustat->steal, tmp);
 }
 
+/*
+ * Use precise platform statistics if available:
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+cputime_t task_utime(struct task_struct *p)
+{
+	return p->utime;
+}
+
+cputime_t task_stime(struct task_struct *p)
+{
+	return p->stime;
+}
+#else
+cputime_t task_utime(struct task_struct *p)
+{
+	clock_t utime = cputime_to_clock_t(p->utime),
+		total = utime + cputime_to_clock_t(p->stime);
+	u64 temp;
+
+	/*
+	 * Use CFS's precise accounting:
+	 */
+	temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+
+	if (total) {
+		temp *= utime;
+		do_div(temp, total);
+	}
+	utime = (clock_t)temp;
+
+	p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
+	return p->prev_utime;
+}
+
+cputime_t task_stime(struct task_struct *p)
+{
+	clock_t stime;
+
+	/*
+	 * Use CFS's precise accounting. (we subtract utime from
+	 * the total, to make sure the total observed by userspace
+	 * grows monotonically - apps rely on that):
+	 */
+	stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
+			cputime_to_clock_t(task_utime(p));
+
+	if (stime >= 0)
+		p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+
+	return p->prev_stime;
+}
+#endif
+
+inline cputime_t task_gtime(struct task_struct *p)
+{
+	return p->gtime;
+}
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
-- 
cgit v1.2.3


From af904deaf6da3f3285eb0a06a3dc6a1af0251030 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 8 Sep 2008 11:58:13 -0400
Subject: NFS: Restore missing hunk in NFS mount option parser

Automounter maps can contain mount options valid for other NFS
implementations but not for Linux.  The Linux automounter uses the
mount command's "-s" command line option ("s" for "sloppy") so that
mount requests containing such options are not rejected.

Commit f45663ce5fb30f76a3414ab3ac69f4dd320e760a attempted to address a
known regression with text-based NFS mount option parsing.  Unrecognized
mount options would cause mount requests to fail, even if the "-s"
option was used on the mount command line.

Unfortunately, this commit was not complete as submitted.  It adds a
new mount option, "sloppy".  But it is missing a hunk, so it now allows
NFS mounts with unrecognized mount options, even if the "sloppy" option
is not present.  This could be a problem if a required critical mount
option such as "sync" is misspelled, for example, and is considered a
regression from 2.6.26.

This patch restores the missing hunk.  Now, the default behavior of
text-based NFS mount options is as before: any unrecognized mount option
will cause the mount to fail.

Please include this in 2.6.27-rc.

Thanks to Neil Brown for reporting this.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Acked-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/super.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9abcd2b329f7..e9b20173fef3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw,
 		}
 	}
 
+	if (errors > 0) {
+		dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n",
+				errors, (errors == 1 ? "" : "s"));
+		if (!sloppy)
+			return 0;
+	}
 	return 1;
 
 out_nomem:
-- 
cgit v1.2.3