From bf068ee266f9dbaa6dacb8433a366bb399e7ae5b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 19 Aug 2008 22:16:43 -0400 Subject: ext4: Handle unwritten extent properly with delayed allocation When using fallocate the buffer_heads are marked unwritten and unmapped. We need to map them in the writepages after a get_block. Otherwise we split the uninit extents, but never write the content to disk. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 59fbbe899acc..a1c7d7623213 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1741,6 +1741,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, if (buffer_delay(bh)) { bh->b_blocknr = pblock; clear_buffer_delay(bh); + bh->b_bdev = inode->i_sb->s_bdev; + } else if (buffer_unwritten(bh)) { + bh->b_blocknr = pblock; + clear_buffer_unwritten(bh); + set_buffer_mapped(bh); + set_buffer_new(bh); + bh->b_bdev = inode->i_sb->s_bdev; } else if (buffer_mapped(bh)) BUG_ON(bh->b_blocknr != pblock); @@ -1814,7 +1821,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) * If blocks are delayed marked, we need to * put actual blocknr and drop delayed bit */ - if (buffer_delay(lbh)) + if (buffer_delay(lbh) || buffer_unwritten(lbh)) mpage_put_bnr_to_bhs(mpd, next, &new); /* go for the remaining blocks */ @@ -1823,7 +1830,8 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) } } -#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) +#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ + (1 << BH_Delay) | (1 << BH_Unwritten)) /* * mpage_add_bh_to_extent - try to add one more block to extent of blocks -- cgit v1.2.3 From b4df2030858bde986cb6ff2e4b45945f84649e32 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Aug 2008 21:44:34 -0400 Subject: ext4: Fix potential truncate BUG due to i_prealloc_list being non-empty We need to call ext4_discard_reservation() earlier in ext4_truncate(), to avoid a BUG() in ext4_mb_return_to_preallocation(), which is called (ultimately) by ext4_free_blocks(). So we must ditch the blocks on i_prealloc_list before we start freeing the data blocks. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a1c7d7623213..2d54c822c4c3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3494,6 +3494,9 @@ void ext4_truncate(struct inode *inode) * modify the block allocation tree. */ down_write(&ei->i_data_sem); + + ext4_discard_reservation(inode); + /* * The orphan list entry will now protect us from any crash which * occurs before the truncate completes, so it is now safe to propagate @@ -3563,8 +3566,6 @@ do_indirects: ; } - ext4_discard_reservation(inode); - up_write(&ei->i_data_sem); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); -- cgit v1.2.3 From cd21322616c3af265d39bf15321d436e667a5dd1 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Tue, 19 Aug 2008 22:16:59 -0400 Subject: ext4: Fix delalloc release block reservation for truncate Ext4 will release the reserved blocks for delayed allocations when inode is truncated/unlinked. If there is no reserved block at all, we shouldn't need to do so. But current code still tries to release the reserved blocks regardless whether the counters's value is 0. Continue to do that causes the later calculation to go wrong and a kernel BUG_ON() caught that. This doesn't happen for extent-based files, as the calculation for 0 reserved blocks was right for extent based file. This patch fixed the kernel BUG() due to above reason. It adds checks for 0 to avoid unnecessary release and fix calculation for non-extent files. Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2d54c822c4c3..5e17d5f22a7e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1005,6 +1005,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) */ static int ext4_calc_metadata_amount(struct inode *inode, int blocks) { + if (!blocks) + return 0; + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) return ext4_ext_calc_metadata_amount(inode, blocks); @@ -1559,7 +1562,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int total, mdb, mdb_free, release; + if (!to_free) + return; /* Nothing to release, exit */ + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + + if (!EXT4_I(inode)->i_reserved_data_blocks) { + /* + * if there is no reserved blocks, but we try to free some + * then the counter is messed up somewhere. + * but since this function is called from invalidate + * page, it's harmless to return without any action + */ + printk(KERN_INFO "ext4 delalloc try to release %d reserved " + "blocks for inode %lu, but there is no reserved " + "data blocks\n", to_free, inode->i_ino); + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + return; + } + /* recalculate the number of metablocks still need to be reserved */ total = EXT4_I(inode)->i_reserved_data_blocks - to_free; mdb = ext4_calc_metadata_amount(inode, total); -- cgit v1.2.3 From d015641734cde55d2fce48a6db3983c8a029fe05 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 19 Aug 2008 21:57:43 -0400 Subject: ext4: Fix ext4_dx_readdir hash collision handling This fixes a bug where readdir() would return a directory entry twice if there was a hash collision in an hash tree indexed directory. Signed-off-by: Eugene Dashevsky Signed-off-by: Mike Snitzer Signed-off-by: "Theodore Ts'o" --- fs/ext4/dir.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d3d23d73c08b..ec8e33b45219 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent, get_dtype(sb, fname->file_type)); if (error) { filp->f_pos = curr_pos; - info->extra_fname = fname->next; + info->extra_fname = fname; return error; } fname = fname->next; @@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp, * If there are any leftover names on the hash collision * chain, return them first. */ - if (info->extra_fname && - call_filldir(filp, dirent, filldir, info->extra_fname)) - goto finished; + if (info->extra_fname) { + if (call_filldir(filp, dirent, filldir, info->extra_fname)) + goto finished; - if (!info->curr_node) + info->extra_fname = NULL; + info->curr_node = rb_next(info->curr_node); + if (!info->curr_node) { + if (info->next_hash == ~0) { + filp->f_pos = EXT4_HTREE_EOF; + goto finished; + } + info->curr_hash = info->next_hash; + info->curr_minor_hash = 0; + } + } else if (!info->curr_node) info->curr_node = rb_first(&info->root); while (1) { -- cgit v1.2.3 From 88aa3cff4e9a38b953de9fbc54c96e619a2bb9f9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Aug 2008 07:57:35 -0400 Subject: ext4: Use ext4_discard_reservations instead of mballoc-specific call In ext4_ext_truncate(), we should use the more generic ext4_discard_reservations() call so we do the right thing when the filesystem is mounted with the nomballoc option. Signed-off-by: "Theodore Ts'o" Reviewed-by: Mingming Cao --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 612c3d2c3824..7212947a8ca3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2819,7 +2819,7 @@ void ext4_ext_truncate(struct inode *inode) down_write(&EXT4_I(inode)->i_data_sem); ext4_ext_invalidate_cache(inode); - ext4_mb_discard_inode_preallocations(inode); + ext4_discard_reservation(inode); /* * TODO: optimization is possible here. -- cgit v1.2.3 From 37609fd5ae62db75026d9f53096a1fbc35e040d9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 19 Aug 2008 22:13:41 -0400 Subject: ext4: don't try to resize if there are no reserved gdt blocks left When trying to resize an ext4 fs and you run out of reserved gdt blocks, you get an error that doesn't actually tell you what went wrong, it just says that the gdb it picked is not correct, which is the case since you don't have any reserved gdt blocks left. This patch adds a check to make sure you have reserved gdt blocks to use, and if not prints out a more relevant error. Signed-off-by: Josef Bacik Cc: Cc: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0a9265164265..b3d35604ea18 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (reserved_gdb || gdb_off == 0) { if (!EXT4_HAS_COMPAT_FEATURE(sb, - EXT4_FEATURE_COMPAT_RESIZE_INODE)){ + EXT4_FEATURE_COMPAT_RESIZE_INODE) + || !le16_to_cpu(es->s_reserved_gdt_blocks)) { ext4_warning(sb, __func__, "No reserved GDT blocks, can't resize"); return -EPERM; -- cgit v1.2.3 From c001077f4003fa75793bb62979baa6241dd8eb19 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 19 Aug 2008 22:19:50 -0400 Subject: ext4: Fix bug where we return ENOSPC even though we have plenty of inodes The find_group_flex() function starts with best_flex as the parent_fbg_group, which happens to have 0 inodes free. Some of the flex groups searched have free blocks and free inodes, but the flex_freeb_ratio is < 10, so they're skipped. Then when a group is compared to the current "best" flex group, it does not have more free blocks than "best", so it is skipped as well. This continues until no flex group with free inodes is found which has a proper ratio or which has more free blocks than the "best" group, and we're left with a "best" group that has 0 inodes free, and we return -ENOSPC. We fix this by changing the logic so that if the current "best" flex group has no inodes free, and the current one does have room, it is promoted to the next "best." Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 655e760212b8..f344834bbf58 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -351,7 +351,7 @@ find_close_to_parent: goto found_flexbg; } - if (best_flex < 0 || + if (flex_group[best_flex].free_inodes == 0 || (flex_group[i].free_blocks > flex_group[best_flex].free_blocks && flex_group[i].free_inodes)) -- cgit v1.2.3 From a02908f19c819aeec5e3dcf238adaa6deddd70b0 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Tue, 19 Aug 2008 22:16:07 -0400 Subject: ext4: journal credits calulation cleanup and fix for non-extent writepage When considering how many journal credits are needed for modifying a chunk of data, we need to account for the super block, inode block, quota blocks and xattr block, indirect/index blocks, also, group bitmap and group descriptor blocks for new allocation (including data and indirect/index blocks). There are many places in ext4 do the calculation on their own and often missed one or two meta blocks, and often they assume single block allocation, and did not considering the multile chunk of allocation case. This patch is trying to cleanup current journal credit code, provides some common helper funtion to calculate the journal credits, to be used for writepage, writepages, DIO, fallocate, migration, defrag, and for both nonextent and extent files. This patch modified the writepage/write_begin credit caculation for nonextent files, to use the new helper function. It also fixed the problem that writepage on nonextent files did not consider the case blocksize Reviewed-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 ++ fs/ext4/ext4_jbd2.h | 8 ++++ fs/ext4/inode.c | 131 ++++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 108 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6c7924d9e358..38e661b0ea88 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1072,6 +1072,7 @@ extern void ext4_set_inode_flags(struct inode *); extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); +extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); @@ -1227,6 +1228,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; /* extents.c */ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_writepage_trans_blocks(struct inode *, int); +extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, + int chunk); extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index eb8bc3afe6e9..b455c685a98b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -51,6 +51,14 @@ EXT4_XATTR_TRANS_BLOCKS - 2 + \ 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) +/* + * Define the number of metadata blocks we need to account to modify data. + * + * This include super block, inode block, quota blocks and xattr blocks + */ +#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ + 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + /* Delete operations potentially hit one directory's namespace plus an * entire inode, plus arbitrary amounts of bitmap/indirection data. Be * generous. We can grow the delete transaction later if necessary. */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5e17d5f22a7e..a27129065144 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4354,56 +4354,119 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } +static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, + int chunk) +{ + int indirects; + + /* if nrblocks are contiguous */ + if (chunk) { + /* + * With N contiguous data blocks, it need at most + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks + * 2 dindirect blocks + * 1 tindirect block + */ + indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); + return indirects + 3; + } + /* + * if nrblocks are not contiguous, worse case, each block touch + * a indirect block, and each indirect block touch a double indirect + * block, plus a triple indirect block + */ + indirects = nrblocks * 2 + 1; + return indirects; +} + +static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) +{ + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + return ext4_indirect_trans_blocks(inode, nrblocks, 0); + return ext4_ext_index_trans_blocks(inode, nrblocks, 0); +} /* - * How many blocks doth make a writepage()? + * Account for index blocks, block groups bitmaps and block group + * descriptor blocks if modify datablocks and index blocks + * worse case, the indexs blocks spread over different block groups * - * With N blocks per page, it may be: - * N data blocks - * 2 indirect block - * 2 dindirect - * 1 tindirect - * N+5 bitmap blocks (from the above) - * N+5 group descriptor summary blocks - * 1 inode block - * 1 superblock. - * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files + * If datablocks are discontiguous, they are possible to spread over + * different block groups too. If they are contiugous, with flexbg, + * they could still across block group boundary. * - * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS + * Also account for superblock, inode, quota and xattr blocks + */ +int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) +{ + int groups, gdpblocks; + int idxblocks; + int ret = 0; + + /* + * How many index blocks need to touch to modify nrblocks? + * The "Chunk" flag indicating whether the nrblocks is + * physically contiguous on disk + * + * For Direct IO and fallocate, they calls get_block to allocate + * one single extent at a time, so they could set the "Chunk" flag + */ + idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); + + ret = idxblocks; + + /* + * Now let's see how many group bitmaps and group descriptors need + * to account + */ + groups = idxblocks; + if (chunk) + groups += 1; + else + groups += nrblocks; + + gdpblocks = groups; + if (groups > EXT4_SB(inode->i_sb)->s_groups_count) + groups = EXT4_SB(inode->i_sb)->s_groups_count; + if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) + gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; + + /* bitmaps and block group descriptor blocks */ + ret += groups + gdpblocks; + + /* Blocks for super block, inode, quota and xattr blocks */ + ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); + + return ret; +} + +/* + * Calulate the total number of credits to reserve to fit + * the modification of a single pages into a single transaction * - * With ordered or writeback data it's the same, less the N data blocks. + * This could be called via ext4_write_begin() or later + * ext4_da_writepages() in delalyed allocation case. * - * If the inode's direct blocks can hold an integral number of pages then a - * page cannot straddle two indirect blocks, and we can only touch one indirect - * and dindirect block, and the "5" above becomes "3". + * In both case it's possible that we could allocating multiple + * chunks of blocks. We need to consider the worse case, when + * one new block per extent. * - * This still overestimates under most circumstances. If we were to pass the - * start and end offsets in here as well we could do block_to_path() on each - * block and work out the exact number of indirects which are touched. Pah. + * For Direct IO and fallocate, the journal credits reservation + * is based on one single extent allocation, so they could use + * EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single + * chunk of allocation needs. */ - int ext4_writepage_trans_blocks(struct inode *inode) { int bpp = ext4_journal_blocks_per_page(inode); - int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_writepage_trans_blocks(inode, bpp); + ret = ext4_meta_trans_blocks(inode, bpp, 0); + /* Account for data blocks for journalled mode */ if (ext4_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else - ret = 2 * (bpp + indirects) + 2; - -#ifdef CONFIG_QUOTA - /* We know that structure was already allocated during DQUOT_INIT so - * we will be updating only the data blocks + inodes */ - ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); -#endif - + ret += bpp; return ret; } - /* * The caller must have previously called ext4_reserve_inode_write(). * Give this, we know that the caller already has write access to iloc->bh. -- cgit v1.2.3 From ee12b630687d510f6f4b6d4acdc4e267fd4adeda Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Tue, 19 Aug 2008 22:16:05 -0400 Subject: ext4: journal credits reservation fixes for extent file writepage This patch modified the writepage/write_begin credit calculation for extent files, to use the credits caculation helper function. The current calculation of how many index/leaf blocks should be accounted is too conservetive, it always considered the worse case, where the tree level is 5, and in the case of multiple chunk allocations, it always assumed no blocks were dirtied in common across the allocations. This path uses the accurate depth of the inode with some extras to calculate the index blocks, and also less conservative in the case of multiple allocation accounting. Signed-off-by: Mingming Cao Reviewed-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4_extents.h | 4 +- fs/ext4/extents.c | 104 +++++++++++++++++++++---------------------------- fs/ext4/migrate.c | 3 +- 3 files changed, 49 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 6c166c0a54b7..d33dc56d6986 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); extern int ext4_extent_tree_init(handle_t *, struct inode *); -extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); +extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, + int num, + struct ext4_ext_path *path); extern int ext4_ext_try_to_merge(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7212947a8ca3..5c5dd3a1d657 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, } /* - * ext4_ext_calc_credits_for_insert: - * This routine returns max. credits that the extent tree can consume. - * It should be OK for low-performance paths like ->writepage() - * To allow many writing processes to fit into a single transaction, - * the caller should calculate credits under i_data_sem and - * pass the actual path. + * ext4_ext_calc_credits_for_single_extent: + * This routine returns max. credits that needed to insert an extent + * to the extent tree. + * When pass the actual path, the caller should calculate credits + * under i_data_sem. */ -int ext4_ext_calc_credits_for_insert(struct inode *inode, +int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, struct ext4_ext_path *path) { - int depth, needed; - if (path) { + int depth = ext_depth(inode); + int ret; + /* probably there is space in leaf? */ - depth = ext_depth(inode); if (le16_to_cpu(path[depth].p_hdr->eh_entries) - < le16_to_cpu(path[depth].p_hdr->eh_max)) - return 1; - } + < le16_to_cpu(path[depth].p_hdr->eh_max)) { - /* - * given 32-bit logical block (4294967296 blocks), max. tree - * can be 4 levels in depth -- 4 * 340^4 == 53453440000. - * Let's also add one more level for imbalance. - */ - depth = 5; - - /* allocation of new data block(s) */ - needed = 2; + /* + * There are some space in the leaf tree, no + * need to account for leaf block credit + * + * bitmaps and block group descriptor blocks + * and other metadat blocks still need to be + * accounted. + */ + /* 1 one bitmap, 1 block group descriptor */ + ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); + } + } - /* - * tree can be full, so it would need to grow in depth: - * we need one credit to modify old root, credits for - * new root will be added in split accounting - */ - needed += 1; + return ext4_meta_trans_blocks(inode, num, 1); +} - /* - * Index split can happen, we would need: - * allocate intermediate indexes (bitmap + group) - * + change two blocks at each level, but root (already included) - */ - needed += (depth * 2) + (depth * 2); +/* + * How many index/leaf blocks need to change/allocate to modify nrblocks? + * + * if nrblocks are fit in a single extent (chunk flag is 1), then + * in the worse case, each tree level index/leaf need to be changed + * if the tree split due to insert a new extent, then the old tree + * index/leaf need to be updated too + * + * If the nrblocks are discontiguous, they could cause + * the whole tree split more than once, but this is really rare. + */ +int ext4_ext_index_trans_blocks(struct inode *inode, int num, int chunk) +{ + int index; + int depth = ext_depth(inode); - /* any allocation modifies superblock */ - needed += 1; + if (chunk) + index = depth * 2; + else + index = depth * 3; - return needed; + return index; } static int ext4_remove_blocks(handle_t *handle, struct inode *inode, @@ -1921,9 +1928,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, correct_index = 1; credits += (ext_depth(inode)) + 1; } -#ifdef CONFIG_QUOTA credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); -#endif err = ext4_ext_journal_restart(handle, credits); if (err) @@ -2858,27 +2863,6 @@ out_stop: ext4_journal_stop(handle); } -/* - * ext4_ext_writepage_trans_blocks: - * calculate max number of blocks we could modify - * in order to allocate new block for an inode - */ -int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) -{ - int needed; - - needed = ext4_ext_calc_credits_for_insert(inode, NULL); - - /* caller wants to allocate num blocks, but note it includes sb */ - needed = needed * num - (num - 1); - -#ifdef CONFIG_QUOTA - needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); -#endif - - return needed; -} - static void ext4_falloc_update_inode(struct inode *inode, int mode, loff_t new_size, int update_ctime) { diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b9e077ba07e9..46fc0b5b12ba 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode, * credit. But below we try to not accumalate too much * of them by restarting the journal. */ - needed = ext4_ext_calc_credits_for_insert(inode, path); + needed = ext4_ext_calc_credits_for_single_extent(inode, + lb->last_block - lb->first_block + 1, path); /* * Make sure the credit we accumalated is not really high -- cgit v1.2.3 From f3bd1f3fa8ca7ec70cfd87aa94dc5e1a260901f2 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Tue, 19 Aug 2008 22:16:03 -0400 Subject: ext4: journal credits reservation fixes for DIO, fallocate DIO and fallocate credit calculation is different than writepage, as they do start a new journal right for each call to ext4_get_blocks_wrap(). This patch uses the helper function in DIO and fallocate case, passing a flag indicating that the modified data are contigous thus could account less indirect/index blocks. This patch also fixed the journal credit reservation for direct I/O (DIO). Previously the estimated credits for DIO only was calculated for non-extent files, which was not enough if the file is extent-based. Also fixed was fallocate double-counting credits for modifying the the superblock. Signed-off-by: Mingming Cao Reviewed-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 1 + fs/ext4/extents.c | 11 +++++------ fs/ext4/inode.c | 45 ++++++++++++++++++++++++--------------------- 3 files changed, 30 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 38e661b0ea88..295003241d3d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1073,6 +1073,7 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); +extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5c5dd3a1d657..5596b70efa20 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1758,7 +1758,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, { if (path) { int depth = ext_depth(inode); - int ret; + int ret = 0; /* probably there is space in leaf? */ if (le16_to_cpu(path[depth].p_hdr->eh_entries) @@ -1777,7 +1777,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, } } - return ext4_meta_trans_blocks(inode, num, 1); + return ext4_chunk_trans_blocks(inode, num); } /* @@ -2810,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode) /* * probably first extent we're gonna free will be last in block */ - err = ext4_writepage_trans_blocks(inode) + 3; + err = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, err); if (IS_ERR(handle)) return; @@ -2923,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - block; /* - * credits to insert 1 extent into extent tree + buffers to be able to - * modify 1 super block, 1 block bitmap and 1 group descriptor. + * credits to insert 1 extent into extent tree */ - credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; + credits = ext4_chunk_trans_blocks(inode, max_blocks); mutex_lock(&inode->i_mutex); retry: while (ret >= 0 && ret < max_blocks) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a27129065144..ffc95ba48859 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1044,18 +1044,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); } -/* Maximum number of blocks we map for direct IO at once. */ -#define DIO_MAX_BLOCKS 4096 -/* - * Number of credits we need for writing DIO_MAX_BLOCKS: - * We need sb + group descriptor + bitmap + inode -> 4 - * For B blocks with A block pointers per block we need: - * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). - * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. - */ -#define DIO_CREDITS 25 - - /* * The ext4_get_blocks_wrap() function try to look up the requested blocks, * and returns if the blocks are already mapped. @@ -1167,19 +1155,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, return retval; } +/* Maximum number of blocks we map for direct IO at once. */ +#define DIO_MAX_BLOCKS 4096 + static int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = ext4_journal_current_handle(); int ret = 0, started = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; + int dio_credits; if (create && !handle) { /* Direct IO write... */ if (max_blocks > DIO_MAX_BLOCKS) max_blocks = DIO_MAX_BLOCKS; - handle = ext4_journal_start(inode, DIO_CREDITS + - 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); + dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); + handle = ext4_journal_start(inode, dio_credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; @@ -2243,7 +2235,7 @@ static int ext4_da_writepage(struct page *page, * for DIO, writepages, and truncate */ #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS -#define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS +#define EXT4_MAX_WRITEBACK_CREDITS 25 static int ext4_da_writepages(struct address_space *mapping, struct writeback_control *wbc) @@ -4441,7 +4433,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) /* * Calulate the total number of credits to reserve to fit - * the modification of a single pages into a single transaction + * the modification of a single pages into a single transaction, + * which may include multiple chunks of block allocations. * * This could be called via ext4_write_begin() or later * ext4_da_writepages() in delalyed allocation case. @@ -4449,11 +4442,6 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) * In both case it's possible that we could allocating multiple * chunks of blocks. We need to consider the worse case, when * one new block per extent. - * - * For Direct IO and fallocate, the journal credits reservation - * is based on one single extent allocation, so they could use - * EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single - * chunk of allocation needs. */ int ext4_writepage_trans_blocks(struct inode *inode) { @@ -4467,6 +4455,21 @@ int ext4_writepage_trans_blocks(struct inode *inode) ret += bpp; return ret; } + +/* + * Calculate the journal credits for a chunk of data modification. + * + * This is called from DIO, fallocate or whoever calling + * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. + * + * journal buffers for data blocks are not included here, as DIO + * and fallocate do no need to journal data buffers. + */ +int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) +{ + return ext4_meta_trans_blocks(inode, nrblocks, 1); +} + /* * The caller must have previously called ext4_reserve_inode_write(). * Give this, we know that the caller already has write access to iloc->bh. -- cgit v1.2.3 From a1d6cc563bfdf1bf2829d3e6ce4d8b774251796b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 19 Aug 2008 21:55:02 -0400 Subject: ext4: Rework the ext4_da_writepages() function With the below changes we reserve credit needed to insert only one extent resulting from a call to single get_block. This makes sure we don't take too much journal credits during writeout. We also don't limit the pages to write. That means we loop through the dirty pages building largest possible contiguous block request. Then we issue a single get_block request. We may get less block that we requested. If so we would end up not mapping some of the buffer_heads. That means those buffer_heads are still marked delay. Later in the writepage callback via __mpage_writepage we redirty those pages. We should also not limit/throttle wbc->nr_to_write in the filesystem writepages callback. That cause wrong behaviour in generic_sync_sb_inodes caused by wbc->nr_to_write being <= 0 Signed-off-by: Aneesh Kumar K.V Reviewed-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 201 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 113 insertions(+), 88 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ffc95ba48859..8dd22eade42c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -41,6 +41,8 @@ #include "acl.h" #include "ext4_extents.h" +#define MPAGE_DA_EXTENT_TAIL 0x01 + static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { @@ -1626,11 +1628,13 @@ struct mpage_da_data { unsigned long first_page, next_page; /* extent of pages */ get_block_t *get_block; struct writeback_control *wbc; + int io_done; + long pages_written; }; /* * mpage_da_submit_io - walks through extent of pages and try to write - * them with __mpage_writepage() + * them with writepage() call back * * @mpd->inode: inode * @mpd->first_page: first page of the extent @@ -1645,18 +1649,11 @@ struct mpage_da_data { static int mpage_da_submit_io(struct mpage_da_data *mpd) { struct address_space *mapping = mpd->inode->i_mapping; - struct mpage_data mpd_pp = { - .bio = NULL, - .last_block_in_bio = 0, - .get_block = mpd->get_block, - .use_writepage = 1, - }; int ret = 0, err, nr_pages, i; unsigned long index, end; struct pagevec pvec; BUG_ON(mpd->next_page <= mpd->first_page); - pagevec_init(&pvec, 0); index = mpd->first_page; end = mpd->next_page - 1; @@ -1674,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) break; index++; - err = __mpage_writepage(page, mpd->wbc, &mpd_pp); - + err = mapping->a_ops->writepage(page, mpd->wbc); + if (!err) + mpd->pages_written++; /* * In error case, we have to continue because * remaining pages are still locked @@ -1686,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) } pagevec_release(&pvec); } - if (mpd_pp.bio) - mpage_bio_submit(WRITE, mpd_pp.bio); - return ret; } @@ -1711,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, int blocks = exbh->b_size >> inode->i_blkbits; sector_t pblock = exbh->b_blocknr, cur_logical; struct buffer_head *head, *bh; - unsigned long index, end; + pgoff_t index, end; struct pagevec pvec; int nr_pages, i; @@ -1796,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode, * * The function skips space we know is already mapped to disk blocks. * - * The function ignores errors ->get_block() returns, thus real - * error handling is postponed to __mpage_writepage() */ static void mpage_da_map_blocks(struct mpage_da_data *mpd) { + int err = 0; struct buffer_head *lbh = &mpd->lbh; - int err = 0, remain = lbh->b_size; sector_t next = lbh->b_blocknr; struct buffer_head new; @@ -1812,35 +1805,32 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) if (buffer_mapped(lbh) && !buffer_delay(lbh)) return; - while (remain) { - new.b_state = lbh->b_state; - new.b_blocknr = 0; - new.b_size = remain; - err = mpd->get_block(mpd->inode, next, &new, 1); - if (err) { - /* - * Rather than implement own error handling - * here, we just leave remaining blocks - * unallocated and try again with ->writepage() - */ - break; - } - BUG_ON(new.b_size == 0); + new.b_state = lbh->b_state; + new.b_blocknr = 0; + new.b_size = lbh->b_size; - if (buffer_new(&new)) - __unmap_underlying_blocks(mpd->inode, &new); + /* + * If we didn't accumulate anything + * to write simply return + */ + if (!new.b_size) + return; + err = mpd->get_block(mpd->inode, next, &new, 1); + if (err) + return; + BUG_ON(new.b_size == 0); - /* - * If blocks are delayed marked, we need to - * put actual blocknr and drop delayed bit - */ - if (buffer_delay(lbh) || buffer_unwritten(lbh)) - mpage_put_bnr_to_bhs(mpd, next, &new); + if (buffer_new(&new)) + __unmap_underlying_blocks(mpd->inode, &new); - /* go for the remaining blocks */ - next += new.b_size >> mpd->inode->i_blkbits; - remain -= new.b_size; - } + /* + * If blocks are delayed marked, we need to + * put actual blocknr and drop delayed bit + */ + if (buffer_delay(lbh) || buffer_unwritten(lbh)) + mpage_put_bnr_to_bhs(mpd, next, &new); + + return; } #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ @@ -1886,13 +1876,9 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, * need to flush current extent and start new one */ mpage_da_map_blocks(mpd); - - /* - * Now start a new extent - */ - lbh->b_size = bh->b_size; - lbh->b_state = bh->b_state & BH_FLAGS; - lbh->b_blocknr = logical; + mpage_da_submit_io(mpd); + mpd->io_done = 1; + return; } /* @@ -1912,17 +1898,35 @@ static int __mpage_da_writepage(struct page *page, struct buffer_head *bh, *head, fake; sector_t logical; + if (mpd->io_done) { + /* + * Rest of the page in the page_vec + * redirty then and skip then. We will + * try to to write them again after + * starting a new transaction + */ + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return MPAGE_DA_EXTENT_TAIL; + } /* * Can we merge this page to current extent? */ if (mpd->next_page != page->index) { /* * Nope, we can't. So, we map non-allocated blocks - * and start IO on them using __mpage_writepage() + * and start IO on them using writepage() */ if (mpd->next_page != mpd->first_page) { mpage_da_map_blocks(mpd); mpage_da_submit_io(mpd); + /* + * skip rest of the page in the page_vec + */ + mpd->io_done = 1; + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return MPAGE_DA_EXTENT_TAIL; } /* @@ -1953,6 +1957,8 @@ static int __mpage_da_writepage(struct page *page, set_buffer_dirty(bh); set_buffer_uptodate(bh); mpage_add_bh_to_extent(mpd, logical, bh); + if (mpd->io_done) + return MPAGE_DA_EXTENT_TAIL; } else { /* * Page with regular buffer heads, just add all dirty ones @@ -1961,8 +1967,12 @@ static int __mpage_da_writepage(struct page *page, bh = head; do { BUG_ON(buffer_locked(bh)); - if (buffer_dirty(bh)) + if (buffer_dirty(bh) && + (!buffer_mapped(bh) || buffer_delay(bh))) { mpage_add_bh_to_extent(mpd, logical, bh); + if (mpd->io_done) + return MPAGE_DA_EXTENT_TAIL; + } logical++; } while ((bh = bh->b_this_page) != head); } @@ -1981,22 +1991,13 @@ static int __mpage_da_writepage(struct page *page, * * This is a library function, which implements the writepages() * address_space_operation. - * - * In order to avoid duplication of logic that deals with partial pages, - * multiple bio per page, etc, we find non-allocated blocks, allocate - * them with minimal calls to ->get_block() and re-use __mpage_writepage() - * - * It's important that we call __mpage_writepage() only once for each - * involved page, otherwise we'd have to implement more complicated logic - * to deal with pages w/o PG_lock or w/ PG_writeback and so on. - * - * See comments to mpage_writepages() */ static int mpage_da_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block) { struct mpage_da_data mpd; + long to_write; int ret; if (!get_block) @@ -2010,17 +2011,22 @@ static int mpage_da_writepages(struct address_space *mapping, mpd.first_page = 0; mpd.next_page = 0; mpd.get_block = get_block; + mpd.io_done = 0; + mpd.pages_written = 0; + + to_write = wbc->nr_to_write; ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); /* * Handle last extent of pages */ - if (mpd.next_page != mpd.first_page) { + if (!mpd.io_done && mpd.next_page != mpd.first_page) { mpage_da_map_blocks(&mpd); mpage_da_submit_io(&mpd); } + wbc->nr_to_write = to_write - mpd.pages_written; return ret; } @@ -2238,7 +2244,7 @@ static int ext4_da_writepage(struct page *page, #define EXT4_MAX_WRITEBACK_CREDITS 25 static int ext4_da_writepages(struct address_space *mapping, - struct writeback_control *wbc) + struct writeback_control *wbc) { struct inode *inode = mapping->host; handle_t *handle = NULL; @@ -2246,42 +2252,53 @@ static int ext4_da_writepages(struct address_space *mapping, int ret = 0; long to_write; loff_t range_start = 0; + long pages_skipped = 0; /* * No pages to write? This is mainly a kludge to avoid starting * a transaction for special inodes like journal inode on last iput() * because that could violate lock ordering on umount */ - if (!mapping->nrpages) + if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) return 0; - /* - * Estimate the worse case needed credits to write out - * EXT4_MAX_BUF_BLOCKS pages - */ - needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; - - to_write = wbc->nr_to_write; - if (!wbc->range_cyclic) { + if (!wbc->range_cyclic) /* * If range_cyclic is not set force range_cont * and save the old writeback_index */ wbc->range_cont = 1; - range_start = wbc->range_start; - } - while (!ret && to_write) { + range_start = wbc->range_start; + pages_skipped = wbc->pages_skipped; + +restart_loop: + to_write = wbc->nr_to_write; + while (!ret && to_write > 0) { + + /* + * we insert one extent at a time. So we need + * credit needed for single extent allocation. + * journalled mode is currently not supported + * by delalloc + */ + BUG_ON(ext4_should_journal_data(inode)); + needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb); + /* start a new transaction*/ handle = ext4_journal_start(inode, needed_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); + printk(KERN_EMERG "%s: jbd2_start: " + "%ld pages, ino %lu; err %d\n", __func__, + wbc->nr_to_write, inode->i_ino, ret); + dump_stack(); goto out_writepages; } if (ext4_should_order_data(inode)) { /* * With ordered mode we need to add - * the inode to the journal handle + * the inode to the journal handl * when we do block allocation. */ ret = ext4_jbd2_file_inode(handle, inode); @@ -2289,20 +2306,20 @@ static int ext4_da_writepages(struct address_space *mapping, ext4_journal_stop(handle); goto out_writepages; } - } - /* - * set the max dirty pages could be write at a time - * to fit into the reserved transaction credits - */ - if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) - wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; to_write -= wbc->nr_to_write; ret = mpage_da_writepages(mapping, wbc, - ext4_da_get_block_write); + ext4_da_get_block_write); ext4_journal_stop(handle); - if (wbc->nr_to_write) { + if (ret == MPAGE_DA_EXTENT_TAIL) { + /* + * got one extent now try with + * rest of the pages + */ + to_write += wbc->nr_to_write; + ret = 0; + } else if (wbc->nr_to_write) { /* * There is no more writeout needed * or we requested for a noblocking writeout @@ -2314,10 +2331,18 @@ static int ext4_da_writepages(struct address_space *mapping, wbc->nr_to_write = to_write; } + if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { + /* We skipped pages in this loop */ + wbc->range_start = range_start; + wbc->nr_to_write = to_write + + wbc->pages_skipped - pages_skipped; + wbc->pages_skipped = pages_skipped; + goto restart_loop; + } + out_writepages: wbc->nr_to_write = to_write; - if (range_start) - wbc->range_start = range_start; + wbc->range_start = range_start; return ret; } -- cgit v1.2.3 From 525f4ed8dcb72c71b306a78ecbf06f41d08fe441 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Tue, 19 Aug 2008 22:15:58 -0400 Subject: ext4: journal credit fix for the delayed allocation's writepages() function Previous delalloc writepages implementation started a new transaction outside of a loop which called get_block() to do the block allocation. Since we didn't know exactly how many blocks would need to be allocated, the estimated journal credits required was very conservative and caused many issues. With the reworked delayed allocation, a new transaction is created for each get_block(), thus we don't need to guess how many credits for the multiple chunk of allocation. We start every transaction with enough credits for inserting a single exent. When estimate the credits for indirect blocks to allocate a chunk of blocks, we need to know the number of data blocks to allocate. We use the total number of reserved delalloc datablocks; if that is too big, for non-extent files, we need to limit the number of blocks to EXT4_MAX_TRANS_BLOCKS. Code cleanup from Aneesh. Signed-off-by: Mingming Cao Reviewed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 8 +++--- fs/ext4/inode.c | 74 ++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5596b70efa20..b24d3c53f20c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1753,7 +1753,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, * When pass the actual path, the caller should calculate credits * under i_data_sem. */ -int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, +int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, struct ext4_ext_path *path) { if (path) { @@ -1772,12 +1772,12 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, * and other metadat blocks still need to be * accounted. */ - /* 1 one bitmap, 1 block group descriptor */ + /* 1 bitmap, 1 block group descriptor */ ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); } } - return ext4_chunk_trans_blocks(inode, num); + return ext4_chunk_trans_blocks(inode, nrblocks); } /* @@ -1791,7 +1791,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, * If the nrblocks are discontiguous, they could cause * the whole tree split more than once, but this is really rare. */ -int ext4_ext_index_trans_blocks(struct inode *inode, int num, int chunk) +int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { int index; int depth = ext_depth(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8dd22eade42c..d1906d9a22de 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1848,29 +1848,53 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical, struct buffer_head *bh) { - struct buffer_head *lbh = &mpd->lbh; sector_t next; + size_t b_size = bh->b_size; + struct buffer_head *lbh = &mpd->lbh; + int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; - next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); - + /* check if thereserved journal credits might overflow */ + if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { + if (nrblocks >= EXT4_MAX_TRANS_DATA) { + /* + * With non-extent format we are limited by the journal + * credit available. Total credit needed to insert + * nrblocks contiguous blocks is dependent on the + * nrblocks. So limit nrblocks. + */ + goto flush_it; + } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > + EXT4_MAX_TRANS_DATA) { + /* + * Adding the new buffer_head would make it cross the + * allowed limit for which we have journal credit + * reserved. So limit the new bh->b_size + */ + b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << + mpd->inode->i_blkbits; + /* we will do mpage_da_submit_io in the next loop */ + } + } /* * First block in the extent */ if (lbh->b_size == 0) { lbh->b_blocknr = logical; - lbh->b_size = bh->b_size; + lbh->b_size = b_size; lbh->b_state = bh->b_state & BH_FLAGS; return; } + next = lbh->b_blocknr + nrblocks; /* * Can we merge the block to our big extent? */ if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { - lbh->b_size += bh->b_size; + lbh->b_size += b_size; return; } +flush_it: /* * We couldn't merge the block to our extent, so we * need to flush current extent and start new one @@ -2231,17 +2255,29 @@ static int ext4_da_writepage(struct page *page, } /* - * For now just follow the DIO way to estimate the max credits - * needed to write out EXT4_MAX_WRITEBACK_PAGES. - * todo: need to calculate the max credits need for - * extent based files, currently the DIO credits is based on - * indirect-blocks mapping way. - * - * Probably should have a generic way to calculate credits - * for DIO, writepages, and truncate + * This is called via ext4_da_writepages() to + * calulate the total number of credits to reserve to fit + * a single extent allocation into a single transaction, + * ext4_da_writpeages() will loop calling this before + * the block allocation. */ -#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS -#define EXT4_MAX_WRITEBACK_CREDITS 25 + +static int ext4_da_writepages_trans_blocks(struct inode *inode) +{ + int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; + + /* + * With non-extent format the journal credit needed to + * insert nrblocks contiguous block is dependent on + * number of contiguous block. So we will limit + * number of contiguous block to a sane value + */ + if (!(inode->i_flags & EXT4_EXTENTS_FL) && + (max_blocks > EXT4_MAX_TRANS_DATA)) + max_blocks = EXT4_MAX_TRANS_DATA; + + return ext4_chunk_trans_blocks(inode, max_blocks); +} static int ext4_da_writepages(struct address_space *mapping, struct writeback_control *wbc) @@ -2283,7 +2319,7 @@ restart_loop: * by delalloc */ BUG_ON(ext4_should_journal_data(inode)); - needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb); + needed_blocks = ext4_da_writepages_trans_blocks(inode); /* start a new transaction*/ handle = ext4_journal_start(inode, needed_blocks); @@ -4461,11 +4497,9 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) * the modification of a single pages into a single transaction, * which may include multiple chunks of block allocations. * - * This could be called via ext4_write_begin() or later - * ext4_da_writepages() in delalyed allocation case. + * This could be called via ext4_write_begin() * - * In both case it's possible that we could allocating multiple - * chunks of blocks. We need to consider the worse case, when + * We need to consider the worse case, when * one new block per extent. */ int ext4_writepage_trans_blocks(struct inode *inode) -- cgit v1.2.3 From 16eb72956496594d023a7d7cd14a86404ad195ad Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 19 Aug 2008 21:16:54 -0400 Subject: ext4: make sure ext4_has_free_blocks returns 0 for ENOSPC Fix ext4_has_free_blocks() to return 0 when we don't have enough space. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1ae5004e93fc..e9fa960ba6da 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1626,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, free_blocks = percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); #endif + if (free_blocks <= root_blocks) + /* we don't have free space */ + return 0; if (free_blocks - root_blocks < nblocks) return free_blocks - root_blocks; return nblocks; -- cgit v1.2.3 From 91246c009094142f95ecc7573b7caed2bcef52c7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 19 Aug 2008 21:14:52 -0400 Subject: ext4: Initialize writeback_index to 0 when allocating a new inode The write_cache_pages() function uses the mapping->writeback_index as the starting index to write out when range_cyclic is set. Properly initialize writeback_index so that we start the writeout at index 0. This was found when debugging the small file fragmentation on ext4. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d5d77958b861..566344b926b7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -568,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) #endif ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; + ei->vfs_inode.i_data.writeback_index = 0; memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); -- cgit v1.2.3 From 5e745b041f2ccad63077118b40468521306f3962 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 18 Aug 2008 18:00:57 -0400 Subject: ext4: Fix small file fragmentation For small file block allocations, mballoc uses per cpu prealloc space. Use goal block when searching for the right prealloc space. Also make sure ext4_da_writepages tries to write all the pages for small files in single attempt Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 21 +++++++++++++++------ fs/ext4/mballoc.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 61 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d1906d9a22de..7e91913e325b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2282,13 +2282,12 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) static int ext4_da_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct inode *inode = mapping->host; handle_t *handle = NULL; - int needed_blocks; - int ret = 0; - long to_write; loff_t range_start = 0; - long pages_skipped = 0; + struct inode *inode = mapping->host; + int needed_blocks, ret = 0, nr_to_writebump = 0; + long to_write, pages_skipped = 0; + struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); /* * No pages to write? This is mainly a kludge to avoid starting @@ -2297,6 +2296,16 @@ static int ext4_da_writepages(struct address_space *mapping, */ if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) return 0; + /* + * Make sure nr_to_write is >= sbi->s_mb_stream_request + * This make sure small files blocks are allocated in + * single attempt. This ensure that small files + * get less fragmented. + */ + if (wbc->nr_to_write < sbi->s_mb_stream_request) { + nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; + wbc->nr_to_write = sbi->s_mb_stream_request; + } if (!wbc->range_cyclic) /* @@ -2377,7 +2386,7 @@ restart_loop: } out_writepages: - wbc->nr_to_write = to_write; + wbc->nr_to_write = to_write - nr_to_writebump; wbc->range_start = range_start; return ret; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 865e9ddb44d4..e0e3a5eb1ddb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3281,6 +3281,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); } +/* + * Return the prealloc space that have minimal distance + * from the goal block. @cpa is the prealloc + * space that is having currently known minimal distance + * from the goal block. + */ +static struct ext4_prealloc_space * +ext4_mb_check_group_pa(ext4_fsblk_t goal_block, + struct ext4_prealloc_space *pa, + struct ext4_prealloc_space *cpa) +{ + ext4_fsblk_t cur_distance, new_distance; + + if (cpa == NULL) { + atomic_inc(&pa->pa_count); + return pa; + } + cur_distance = abs(goal_block - cpa->pa_pstart); + new_distance = abs(goal_block - pa->pa_pstart); + + if (cur_distance < new_distance) + return cpa; + + /* drop the previous reference */ + atomic_dec(&cpa->pa_count); + atomic_inc(&pa->pa_count); + return pa; +} + /* * search goal blocks in preallocated space */ @@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) int order, i; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_locality_group *lg; - struct ext4_prealloc_space *pa; + struct ext4_prealloc_space *pa, *cpa = NULL; + ext4_fsblk_t goal_block; /* only data can be preallocated */ if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) @@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; + goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + + ac->ac_g_ex.fe_start + + le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); + /* + * search for the prealloc space that is having + * minimal distance from the goal block. + */ for (i = order; i < PREALLOC_TB_SIZE; i++) { rcu_read_lock(); list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], @@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) spin_lock(&pa->pa_lock); if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { - atomic_inc(&pa->pa_count); - ext4_mb_use_group_pa(ac, pa); - spin_unlock(&pa->pa_lock); - ac->ac_criteria = 20; - rcu_read_unlock(); - return 1; + + cpa = ext4_mb_check_group_pa(goal_block, + pa, cpa); } spin_unlock(&pa->pa_lock); } rcu_read_unlock(); } + if (cpa) { + ext4_mb_use_group_pa(ac, cpa); + ac->ac_criteria = 20; + return 1; + } return 0; } -- cgit v1.2.3 From aab3a8c7a3a6a001dd439ed00d4db17a1059803e Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 19 Aug 2008 14:23:37 +0000 Subject: [CIFS] reindent misindented statement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: Steve French --- fs/cifs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 848286861c31..9c548f110102 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode, if ((inode->i_mode & S_IWUGO) == 0 && (attr & ATTR_READONLY) == 0) inode->i_mode |= (S_IWUGO & default_mode); - inode->i_mode &= ~S_IFMT; + + inode->i_mode &= ~S_IFMT; } /* clear write bits if ATTR_READONLY is set */ if (attr & ATTR_READONLY) -- cgit v1.2.3 From cb7691b648bddbfaf6dd8d8068273dbb18d2484c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 18 Aug 2008 15:41:05 -0400 Subject: cifs: add local server pointer to cifs_setup_session cifs_setup_session references pSesInfo->server several times. That pointer shouldn't change during the life of the function so grab it once and store it in a local var. This makes the code look a little cleaner too. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0711db65afe8..4c13bcdb92a5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3598,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, char ntlm_session_key[CIFS_SESS_KEY_SIZE]; bool ntlmv2_flag = false; int first_time = 0; + struct TCP_Server_Info *server = pSesInfo->server; /* what if server changes its buffer size after dropping the session? */ - if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { + if (server->maxBuf == 0) /* no need to send on reconnect */ { rc = CIFSSMBNegotiate(xid, pSesInfo); - if (rc == -EAGAIN) /* retry only once on 1st time connection */ { + if (rc == -EAGAIN) { + /* retry only once on 1st time connection */ rc = CIFSSMBNegotiate(xid, pSesInfo); if (rc == -EAGAIN) rc = -EHOSTDOWN; } if (rc == 0) { spin_lock(&GlobalMid_Lock); - if (pSesInfo->server->tcpStatus != CifsExiting) - pSesInfo->server->tcpStatus = CifsGood; + if (server->tcpStatus != CifsExiting) + server->tcpStatus = CifsGood; else rc = -EHOSTDOWN; spin_unlock(&GlobalMid_Lock); @@ -3623,23 +3625,22 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, goto ss_err_exit; pSesInfo->flags = 0; - pSesInfo->capabilities = pSesInfo->server->capabilities; + pSesInfo->capabilities = server->capabilities; if (linuxExtEnabled == 0) pSesInfo->capabilities &= (~CAP_UNIX); /* pSesInfo->sequence_number = 0;*/ cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", - pSesInfo->server->secMode, - pSesInfo->server->capabilities, - pSesInfo->server->timeAdj)); + server->secMode, server->capabilities, server->timeAdj)); + if (experimEnabled < 2) rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); else if (extended_security && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) - && (pSesInfo->server->secType == NTLMSSP)) { + && (server->secType == NTLMSSP)) { rc = -EOPNOTSUPP; } else if (extended_security && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) - && (pSesInfo->server->secType == RawNTLMSSP)) { + && (server->secType == RawNTLMSSP)) { cFYI(1, ("NTLMSSP sesssetup")); rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, nls_info); @@ -3668,12 +3669,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, } else { SMBNTencrypt(pSesInfo->password, - pSesInfo->server->cryptKey, + server->cryptKey, ntlm_session_key); if (first_time) cifs_calculate_mac_key( - &pSesInfo->server->mac_signing_key, + &server->mac_signing_key, ntlm_session_key, pSesInfo->password); } @@ -3686,13 +3687,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, nls_info); } } else { /* old style NTLM 0.12 session setup */ - SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey, + SMBNTencrypt(pSesInfo->password, server->cryptKey, ntlm_session_key); if (first_time) - cifs_calculate_mac_key( - &pSesInfo->server->mac_signing_key, - ntlm_session_key, pSesInfo->password); + cifs_calculate_mac_key(&server->mac_signing_key, + ntlm_session_key, + pSesInfo->password); rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); } -- cgit v1.2.3 From c16fefa56334e8d0197492607e473fdbb813073f Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 19 Aug 2008 19:35:33 +0000 Subject: [CIFS] distinguish between Kerberos and MSKerberos in upcall Properly handle MSKRB5 by passing sec=mskrb5 to the upcall so that the spengo blob can be generated appropriately. Also, make decode_negTokenInit prefer whichever mechanism is first in the list. Needed for some NetApp servers, and possibly some older versions of Windows which treat the two KRB5 mechanisms differently. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/asn1.c | 11 ++++++++--- fs/cifs/cifs_spnego.c | 4 +++- fs/cifs/cifsglob.h | 3 ++- fs/cifs/sess.c | 2 +- 4 files changed, 14 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 5fabd2caf93c..1b09f1670061 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, unsigned int cls, con, tag, oidlen, rc; bool use_ntlmssp = false; bool use_kerberos = false; + bool use_mskerberos = false; *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ @@ -574,10 +575,12 @@ decode_negTokenInit(unsigned char *security_blob, int length, *(oid + 1), *(oid + 2), *(oid + 3))); if (compare_oid(oid, oidlen, MSKRB5_OID, - MSKRB5_OID_LEN)) - use_kerberos = true; + MSKRB5_OID_LEN) && + !use_kerberos) + use_mskerberos = true; else if (compare_oid(oid, oidlen, KRB5_OID, - KRB5_OID_LEN)) + KRB5_OID_LEN) && + !use_mskerberos) use_kerberos = true; else if (compare_oid(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) @@ -630,6 +633,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (use_kerberos) *secType = Kerberos; + else if (use_mskerberos) + *secType = MSKerberos; else if (use_ntlmssp) *secType = NTLMSSP; diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 2434ab0e8791..117ef4bba68e 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -114,9 +114,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) dp = description + strlen(description); - /* for now, only sec=krb5 is valid */ + /* for now, only sec=krb5 and sec=mskrb5 are valid */ if (server->secType == Kerberos) sprintf(dp, ";sec=krb5"); + else if (server->secType == MSKerberos) + sprintf(dp, ";sec=mskrb5"); else goto out; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7e1cf262effe..8dfd6f24d488 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -80,7 +80,8 @@ enum securityEnum { NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO */ NTLMSSP, /* NTLMSSP via SPNEGO */ - Kerberos /* Kerberos via SPNEGO */ + Kerberos, /* Kerberos via SPNEGO */ + MSKerberos, /* MS Kerberos via SPNEGO */ }; enum protocolEnum { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index ed150efbe27c..3188e4d9cddb 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -505,7 +505,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); } else ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else if (type == Kerberos) { + } else if (type == Kerberos || type == MSKerberos) { #ifdef CONFIG_CIFS_UPCALL struct cifs_spnego_msg *msg; spnego_key = cifs_get_spnego_key(ses); -- cgit v1.2.3 From 3d2af3465e91335bd1dbf36b19e92079d901409f Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 19 Aug 2008 20:51:09 +0000 Subject: [CIFS] Kerberos support not considered experimental anymore Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/Kconfig | 1 - fs/cifs/README | 30 ++++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index d3873583360b..f0427105a619 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1984,7 +1984,6 @@ config CIFS_EXPERIMENTAL config CIFS_UPCALL bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" - depends on CIFS_EXPERIMENTAL depends on KEYS help Enables an upcall mechanism for CIFS which accesses diff --git a/fs/cifs/README b/fs/cifs/README index 2bd6fe556f88..68b5c1169d9d 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -642,8 +642,30 @@ The statistics for the number of total SMBs and oplock breaks are different in that they represent all for that share, not just those for which the server returned success. -Also note that "cat /proc/fs/cifs/DebugData" will display information about +Also note that "cat /proc/fs/cifs/DebugData" will display information about the active sessions and the shares that are mounted. -Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is -on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and -LANMAN support do not require this helper. + +Enabling Kerberos (extended security) works but requires version 1.2 or later +of the helper program cifs.upcall to be present and to be configured in the +/etc/request-key.conf file. The cifs.upcall helper program is from the Samba +project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not +require this helper. Note that NTLMv2 security (which does not require the +cifs.upcall helper program), instead of using Kerberos, is sufficient for +some use cases. + +Enabling DFS support (used to access shares transparently in an MS-DFS +global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In +addition, DFS support for target shares which are specified as UNC +names which begin with host names (rather than IP addresses) requires +a user space helper (such as cifs.upcall) to be present in order to +translate host names to ip address, and the user space helper must also +be configured in the file /etc/request-key.conf + +To use cifs Kerberos and DFS support, the Linux keyutils package should be +installed and something like the following lines should be added to the +/etc/request-key.conf file: + +create cifs.spnego * * /usr/local/sbin/cifs.upcall %k +create dns_resolver * * /usr/local/sbin/cifs.upcall %k + + -- cgit v1.2.3 From 18496e80f729be5f536d0315751b3bbb95ca913e Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 7 Aug 2008 00:11:12 +0300 Subject: [PATCH] ocfs2/cluster/tcp.c: make some functions static Commit 0f475b2abed6cbccee1da20a0bef2895eb2a0edd (ocfs2/net: Silence build warnings) made sense as far as it fixed compile warnings, but it was not required that it made the functions global. Signed-off-by: Adrian Bunk Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/tcp.c | 44 ++++++++++++++++++++++++++++++++++------- fs/ocfs2/cluster/tcp_internal.h | 32 ------------------------------ 2 files changed, 37 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index a27d61581bd6..2bcf706d9dd3 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); #ifdef CONFIG_DEBUG_FS -void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node) +static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) { INIT_LIST_HEAD(&nst->st_net_debug_item); nst->st_task = task; @@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, nst->st_node = node; } -void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) { do_gettimeofday(&nst->st_sock_time); } -void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) { do_gettimeofday(&nst->st_send_time); } -void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) { do_gettimeofday(&nst->st_status_time); } -void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, +static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, struct o2net_sock_container *sc) { nst->st_sc = sc; } -void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) +static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) { nst->st_id = msg_id; } + +#else /* CONFIG_DEBUG_FS */ + +static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) +{ +} + +static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +{ +} + +static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +{ +} + +static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +{ +} + +static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc) +{ +} + +static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, + u32 msg_id) +{ +} + #endif /* CONFIG_DEBUG_FS */ static inline int o2net_reconnect_delay(void) diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 18307ff81b77..8d58cfe410b1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -224,42 +224,10 @@ struct o2net_send_tracking { struct timeval st_send_time; struct timeval st_status_time; }; - -void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node); -void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); -void o2net_set_nst_send_time(struct o2net_send_tracking *nst); -void o2net_set_nst_status_time(struct o2net_send_tracking *nst); -void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, - struct o2net_sock_container *sc); -void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); - #else struct o2net_send_tracking { u32 dummy; }; - -static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node) -{ -} -static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) -{ -} -static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) -{ -} -static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) -{ -} -static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, - struct o2net_sock_container *sc) -{ -} -static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, - u32 msg_id) -{ -} #endif /* CONFIG_DEBUG_FS */ #endif /* O2CLUSTER_TCP_INTERNAL_H */ -- cgit v1.2.3 From a57a874b04e27cb530a0e18c244387452e73ccce Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Wed, 6 Aug 2008 00:50:41 +0400 Subject: [PATCH] ocfs2/cluster/netdebug.c: fix warning ocfs2/cluster/netdebug.c: fix warning fs/ocfs2/cluster/netdebug.c:154: warning: format '%lu' expects type 'long unsigned int', but argument 17 has type 'suseconds_t' Signed-off-by: Alexander Beregalov Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/netdebug.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index d8bfa0eb41b2..52276c02f710 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v) " message id: %d\n" " message type: %u\n" " message key: 0x%08x\n" - " sock acquiry: %lu.%lu\n" - " send start: %lu.%lu\n" - " wait start: %lu.%lu\n", + " sock acquiry: %lu.%ld\n" + " send start: %lu.%ld\n" + " wait start: %lu.%ld\n", nst, (unsigned long)nst->st_task->pid, (unsigned long)nst->st_task->tgid, nst->st_task->comm, nst->st_node, nst->st_sc, nst->st_id, nst->st_msg_type, nst->st_msg_key, nst->st_sock_time.tv_sec, - (unsigned long)nst->st_sock_time.tv_usec, + (long)nst->st_sock_time.tv_usec, nst->st_send_time.tv_sec, - (unsigned long)nst->st_send_time.tv_usec, + (long)nst->st_send_time.tv_usec, nst->st_status_time.tv_sec, - nst->st_status_time.tv_usec); + (long)nst->st_status_time.tv_usec); } spin_unlock(&o2net_debug_lock); @@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) return sc; /* unused, just needs to be null when done */ } -#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec +#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec static int sc_seq_show(struct seq_file *seq, void *v) { @@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v) " remote node: %s\n" " page off: %zu\n" " handshake ok: %u\n" - " timer: %lu.%lu\n" - " data ready: %lu.%lu\n" - " advance start: %lu.%lu\n" - " advance stop: %lu.%lu\n" - " func start: %lu.%lu\n" - " func stop: %lu.%lu\n" + " timer: %lu.%ld\n" + " data ready: %lu.%ld\n" + " advance start: %lu.%ld\n" + " advance stop: %lu.%ld\n" + " func start: %lu.%ld\n" + " func stop: %lu.%ld\n" " func key: %u\n" " func type: %u\n", sc, -- cgit v1.2.3 From a1af7d15a18d1e375b0a6fee93789a0bbfe088b4 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 19 Aug 2008 17:20:28 -0700 Subject: ocfs2: Fix sleep-with-spinlock recovery regression This fixes a bug introduced with 539d8264093560b917ee3afe4c7f74e5da09d6a5: [PATCH 2/2] ocfs2: Fix race between mount and recovery ocfs2_mark_dead_nodes() was reading journal inodes while holding the spinlock protecting our in-memory recovery state. The fix is very simple - the disk state is protected by a cluster lock that's already held, so we just move the spinlock down past the read. Reviewed-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/journal.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 7a37240f7a31..c47bc2a809c2 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1418,13 +1418,13 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) { unsigned int node_num; int status, i; + u32 gen; struct buffer_head *bh = NULL; struct ocfs2_dinode *di; /* This is called with the super block cluster lock, so we * know that the slot map can't change underneath us. */ - spin_lock(&osb->osb_lock); for (i = 0; i < osb->max_slots; i++) { /* Read journal inode to get the recovery generation */ status = ocfs2_read_journal_inode(osb, i, &bh, NULL); @@ -1433,23 +1433,31 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) goto bail; } di = (struct ocfs2_dinode *)bh->b_data; - osb->slot_recovery_generations[i] = - ocfs2_get_recovery_generation(di); + gen = ocfs2_get_recovery_generation(di); brelse(bh); bh = NULL; + spin_lock(&osb->osb_lock); + osb->slot_recovery_generations[i] = gen; + mlog(0, "Slot %u recovery generation is %u\n", i, osb->slot_recovery_generations[i]); - if (i == osb->slot_num) + if (i == osb->slot_num) { + spin_unlock(&osb->osb_lock); continue; + } status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); - if (status == -ENOENT) + if (status == -ENOENT) { + spin_unlock(&osb->osb_lock); continue; + } - if (__ocfs2_recovery_map_test(osb, node_num)) + if (__ocfs2_recovery_map_test(osb, node_num)) { + spin_unlock(&osb->osb_lock); continue; + } spin_unlock(&osb->osb_lock); /* Ok, we have a slot occupied by another node which @@ -1465,10 +1473,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) mlog_errno(status); goto bail; } - - spin_lock(&osb->osb_lock); } - spin_unlock(&osb->osb_lock); status = 0; bail: -- cgit v1.2.3 From 83cab5338fa8c74f979223698c8d4cc88f2ab68e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 21 Aug 2008 14:14:27 +0800 Subject: ocfs2: Jump to correct label in ocfs2_expand_inline_dir() When we fail to insert extent in ocfs2_expand_inline_dir(), we should go to out_commit, not out. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8a1875848080..8e9c4a47d819 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1310,7 +1310,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, NULL); if (ret) { mlog_errno(ret); - goto out; + goto out_commit; } ret = ocfs2_journal_dirty(handle, di_bh); @@ -1336,7 +1336,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, len, 0, NULL); if (ret) { mlog_errno(ret); - goto out; + goto out_commit; } } -- cgit v1.2.3 From 9780eb6cfaf7d2d5ccc061eaf94e7aec6a17791e Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 5 Aug 2008 11:32:46 -0700 Subject: ocfs2: correctly set i_blocks after inline dir gets expanded We were setting i_blocks based on allocation before the extent insert, which is wrong as the value is a calculation based on ip_clusters which gets updated as a result of the insert. This patch moves the line in question to just after the call to ocfs2_insert_extent(). Without this fix, inline directories were temporarily having an i_blocks value of zero immediately after expansion to extents. Reported-and-tested-by: Tristan Ye Signed-off-by: Mark Fasheh --- fs/ocfs2/dir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8e9c4a47d819..9cce563fd627 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1300,7 +1300,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, di->i_size = cpu_to_le64(sb->s_blocksize); di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); - dir->i_blocks = ocfs2_inode_sector_count(dir); /* * This should never fail as our extent list is empty and all @@ -1313,6 +1312,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, goto out_commit; } + /* + * Set i_blocks after the extent insert for the most up to + * date ip_clusters value. + */ + dir->i_blocks = ocfs2_inode_sector_count(dir); + ret = ocfs2_journal_dirty(handle, di_bh); if (ret) { mlog_errno(ret); -- cgit v1.2.3 From de6bf18e9ce0df807dab08cff08751cac383429d Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Fri, 15 Aug 2008 12:37:23 -0700 Subject: [PATCH] configfs: Consolidate locking around configfs_detach_prep() in configfs_rmdir() It appears that configfs_rmdir() can protect configfs_detach_prep() retries with less calls to {spin,mutex}_{lock,unlock}, and a cleaner code. This patch does not change any behavior, except that it removes two useless lock/unlock pairs having nothing inside to protect and providing a useless barrier. Signed-off-by: Louis Rilling Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/configfs/dir.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7a8db78a91d2..8e93341f3e82 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1311,16 +1311,18 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) * Ensure that no racing symlink() will make detach_prep() fail while * the new link is temporarily attached */ - mutex_lock(&configfs_symlink_mutex); - spin_lock(&configfs_dirent_lock); do { struct mutex *wait_mutex; + mutex_lock(&configfs_symlink_mutex); + spin_lock(&configfs_dirent_lock); ret = configfs_detach_prep(dentry, &wait_mutex); - if (ret) { + if (ret) configfs_detach_rollback(dentry); - spin_unlock(&configfs_dirent_lock); - mutex_unlock(&configfs_symlink_mutex); + spin_unlock(&configfs_dirent_lock); + mutex_unlock(&configfs_symlink_mutex); + + if (ret) { if (ret != -EAGAIN) { config_item_put(parent_item); return ret; @@ -1329,13 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) /* Wait until the racing operation terminates */ mutex_lock(wait_mutex); mutex_unlock(wait_mutex); - - mutex_lock(&configfs_symlink_mutex); - spin_lock(&configfs_dirent_lock); } } while (ret == -EAGAIN); - spin_unlock(&configfs_dirent_lock); - mutex_unlock(&configfs_symlink_mutex); /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); -- cgit v1.2.3 From 7a8fc9b248e77a4eab0613acf30a6811799786b3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 17 Aug 2008 17:36:59 +0300 Subject: removed unused #include 's This patch lets the files using linux/version.h match the files that #include it. Signed-off-by: Adrian Bunk Signed-off-by: Linus Torvalds --- arch/arm/plat-omap/clock.c | 1 - arch/cris/arch-v32/kernel/fasttimer.c | 2 -- arch/mn10300/kernel/mn10300-serial.c | 1 - arch/powerpc/sysdev/bestcomm/gen_bd.c | 1 - arch/x86/mach-rdc321x/platform.c | 1 - drivers/atm/adummy.c | 1 - drivers/char/xilinx_hwicap/buffer_icap.h | 1 - drivers/char/xilinx_hwicap/fifo_icap.h | 1 - drivers/char/xilinx_hwicap/xilinx_hwicap.h | 1 - drivers/edac/edac_core.h | 1 - drivers/i2c/busses/i2c-at91.c | 1 - drivers/infiniband/hw/ehca/ehca_tools.h | 1 - drivers/infiniband/hw/ipath/ipath_fs.c | 1 - drivers/infiniband/hw/nes/nes.h | 1 - drivers/infiniband/ulp/iser/iser_verbs.c | 1 - drivers/input/keyboard/bf54x-keys.c | 1 - drivers/input/touchscreen/mainstone-wm97xx.c | 1 - drivers/mfd/asic3.c | 1 - drivers/misc/eeprom_93cx6.c | 1 - drivers/mtd/maps/amd76xrom.c | 1 - drivers/mtd/maps/ck804xrom.c | 1 - drivers/mtd/maps/esb2rom.c | 1 - drivers/mtd/nand/au1550nd.c | 1 - drivers/net/myri10ge/myri10ge.c | 1 - drivers/net/netxen/netxen_nic.h | 1 - drivers/net/netxen/netxen_nic_ethtool.c | 1 - drivers/net/netxen/netxen_nic_hdr.h | 2 -- drivers/net/tokenring/lanstreamer.c | 1 - drivers/net/tokenring/lanstreamer.h | 2 -- drivers/net/wireless/b43legacy/main.c | 1 - drivers/net/wireless/iwlwifi/iwl-3945-led.c | 1 - drivers/net/wireless/iwlwifi/iwl-led.c | 1 - drivers/net/wireless/iwlwifi/iwl-rfkill.c | 1 - drivers/rtc/rtc-max6902.c | 2 -- drivers/rtc/rtc-r9701.c | 1 - drivers/s390/net/ctcm_mpc.c | 1 - drivers/scsi/dpt/dpti_i2o.h | 1 - drivers/scsi/ips.c | 1 - drivers/scsi/ips.h | 1 - drivers/scsi/lpfc/lpfc_debugfs.c | 1 - drivers/scsi/nsp32.c | 1 - drivers/scsi/nsp32.h | 1 - drivers/scsi/pcmcia/nsp_cs.c | 1 - drivers/scsi/qla2xxx/qla_mid.c | 1 - drivers/usb/atm/ueagle-atm.c | 1 - drivers/usb/gadget/amd5536udc.c | 1 - drivers/usb/gadget/s3c2410_udc.c | 1 - drivers/usb/misc/iowarrior.c | 1 - drivers/usb/serial/garmin_gps.c | 2 -- drivers/video/arkfb.c | 1 - drivers/video/s3fb.c | 1 - drivers/video/vermilion/vermilion.h | 1 - drivers/video/vt8623fb.c | 1 - drivers/video/xilinxfb.c | 1 - fs/jffs2/jffs2_fs_i.h | 1 - fs/xfs/xfs_dmapi.h | 1 - include/asm-x86/xen/hypervisor.h | 1 - include/linux/fs_uart_pd.h | 1 - kernel/nsproxy.c | 1 - kernel/power/swap.c | 1 - kernel/user_namespace.c | 1 - kernel/utsname.c | 1 - kernel/utsname_sysctl.c | 1 - sound/mips/au1x00.c | 1 - sound/soc/at91/eti_b1_wm8731.c | 1 - sound/soc/codecs/wm8753.c | 1 - sound/soc/codecs/wm9712.c | 1 - 67 files changed, 72 deletions(-) (limited to 'fs') diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c index 23a070599993..197974defbe4 100644 --- a/arch/arm/plat-omap/clock.c +++ b/arch/arm/plat-omap/clock.c @@ -10,7 +10,6 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include #include #include #include diff --git a/arch/cris/arch-v32/kernel/fasttimer.c b/arch/cris/arch-v32/kernel/fasttimer.c index 2de9d5849ef0..111caa1a2efb 100644 --- a/arch/cris/arch-v32/kernel/fasttimer.c +++ b/arch/cris/arch-v32/kernel/fasttimer.c @@ -19,8 +19,6 @@ #include #include -#include - #include #include #include diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index 8b054e7a8ae8..aa07d0cd1905 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -17,7 +17,6 @@ static const char serial_revdate[] = "2007-11-06"; #define SUPPORT_SYSRQ #endif -#include #include #include #include diff --git a/arch/powerpc/sysdev/bestcomm/gen_bd.c b/arch/powerpc/sysdev/bestcomm/gen_bd.c index a3a134c35b0a..e0a53e3147b2 100644 --- a/arch/powerpc/sysdev/bestcomm/gen_bd.c +++ b/arch/powerpc/sysdev/bestcomm/gen_bd.c @@ -11,7 +11,6 @@ * */ -#include #include #include #include diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c index a037041817c7..4f4e50c3ad3b 100644 --- a/arch/x86/mach-rdc321x/platform.c +++ b/arch/x86/mach-rdc321x/platform.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c index 2ebd07f2ef81..5effec6f5458 100644 --- a/drivers/atm/adummy.c +++ b/drivers/atm/adummy.c @@ -3,7 +3,6 @@ */ #include -#include #include #include #include diff --git a/drivers/char/xilinx_hwicap/buffer_icap.h b/drivers/char/xilinx_hwicap/buffer_icap.h index c5b1840906b2..8b0252bf06e2 100644 --- a/drivers/char/xilinx_hwicap/buffer_icap.h +++ b/drivers/char/xilinx_hwicap/buffer_icap.h @@ -38,7 +38,6 @@ #include #include -#include #include #include diff --git a/drivers/char/xilinx_hwicap/fifo_icap.h b/drivers/char/xilinx_hwicap/fifo_icap.h index ffabd3ba2bd8..62bda453c90b 100644 --- a/drivers/char/xilinx_hwicap/fifo_icap.h +++ b/drivers/char/xilinx_hwicap/fifo_icap.h @@ -38,7 +38,6 @@ #include #include -#include #include #include diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.h b/drivers/char/xilinx_hwicap/xilinx_hwicap.h index 1f9c8b082dbe..24d0d9b938fb 100644 --- a/drivers/char/xilinx_hwicap/xilinx_hwicap.h +++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.h @@ -38,7 +38,6 @@ #include #include -#include #include #include diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index b27b13c5eb5a..4b55ec607a88 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -34,7 +34,6 @@ #include #include #include -#include #define EDAC_MC_LABEL_LEN 31 #define EDAC_DEVICE_NAME_LEN 31 diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index c1adcdbf7979..9efb02137254 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -14,7 +14,6 @@ */ #include -#include #include #include #include diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index ec950bf8c479..21f7d06f14ad 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -54,7 +54,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 23faba9d21eb..8bb5170b4e41 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include #include #include #include diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 39bd897b40c6..8eb7ae96974d 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -43,7 +43,6 @@ #include #include #include -#include #include #include diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 63462ecca147..26ff6214a81f 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -33,7 +33,6 @@ #include #include #include -#include #include "iscsi_iser.h" diff --git a/drivers/input/keyboard/bf54x-keys.c b/drivers/input/keyboard/bf54x-keys.c index 54ed8e2e1c02..6f227d3dbda1 100644 --- a/drivers/input/keyboard/bf54x-keys.c +++ b/drivers/input/keyboard/bf54x-keys.c @@ -29,7 +29,6 @@ */ #include -#include #include #include diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c index 283f93a0cee2..37a555f37306 100644 --- a/drivers/input/touchscreen/mainstone-wm97xx.c +++ b/drivers/input/touchscreen/mainstone-wm97xx.c @@ -25,7 +25,6 @@ #include #include -#include #include #include #include diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index c6408a62d95e..bc2a807f210d 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -16,7 +16,6 @@ * */ -#include #include #include #include diff --git a/drivers/misc/eeprom_93cx6.c b/drivers/misc/eeprom_93cx6.c index ea55654e5948..15b1780025c8 100644 --- a/drivers/misc/eeprom_93cx6.c +++ b/drivers/misc/eeprom_93cx6.c @@ -26,7 +26,6 @@ #include #include -#include #include #include diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c index 948b86f35ef4..d1eec7d3243f 100644 --- a/drivers/mtd/maps/amd76xrom.c +++ b/drivers/mtd/maps/amd76xrom.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include diff --git a/drivers/mtd/maps/ck804xrom.c b/drivers/mtd/maps/ck804xrom.c index effaf7cdefab..1a6feb4474de 100644 --- a/drivers/mtd/maps/ck804xrom.c +++ b/drivers/mtd/maps/ck804xrom.c @@ -9,7 +9,6 @@ #include #include -#include #include #include #include diff --git a/drivers/mtd/maps/esb2rom.c b/drivers/mtd/maps/esb2rom.c index aa64a4752781..bbbcdd4c8d13 100644 --- a/drivers/mtd/maps/esb2rom.c +++ b/drivers/mtd/maps/esb2rom.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c index 761946ea45b1..92c334ff4508 100644 --- a/drivers/mtd/nand/au1550nd.c +++ b/drivers/mtd/nand/au1550nd.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 5d76cd09e246..54cd89cb0838 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index ab871df6b1db..244ab49c4337 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -45,7 +45,6 @@ #include #include #include -#include #include #include diff --git a/drivers/net/netxen/netxen_nic_ethtool.c b/drivers/net/netxen/netxen_nic_ethtool.c index 4ad3e0844b99..b974ca0fc530 100644 --- a/drivers/net/netxen/netxen_nic_ethtool.c +++ b/drivers/net/netxen/netxen_nic_ethtool.c @@ -38,7 +38,6 @@ #include #include #include -#include #include "netxen_nic.h" #include "netxen_nic_hw.h" diff --git a/drivers/net/netxen/netxen_nic_hdr.h b/drivers/net/netxen/netxen_nic_hdr.h index e8e8d73f6ed7..e80f9e3e5973 100644 --- a/drivers/net/netxen/netxen_nic_hdr.h +++ b/drivers/net/netxen/netxen_nic_hdr.h @@ -32,8 +32,6 @@ #include #include -#include - #include #include #include diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c index 47d84cd28097..59d1673f9387 100644 --- a/drivers/net/tokenring/lanstreamer.c +++ b/drivers/net/tokenring/lanstreamer.c @@ -119,7 +119,6 @@ #include #include #include -#include #include #include diff --git a/drivers/net/tokenring/lanstreamer.h b/drivers/net/tokenring/lanstreamer.h index e7bb3494afc7..13ccee6449c1 100644 --- a/drivers/net/tokenring/lanstreamer.h +++ b/drivers/net/tokenring/lanstreamer.h @@ -60,8 +60,6 @@ * */ -#include - /* MAX_INTR - the maximum number of times we can loop * inside the interrupt function before returning * control to the OS (maximum value is 256) diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 2541c81932f0..1cb77db5c292 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-led.c b/drivers/net/wireless/iwlwifi/iwl-3945-led.c index d3336966b6b5..705c65bed9fd 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-led.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945-led.c @@ -27,7 +27,6 @@ #include #include -#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-led.c b/drivers/net/wireless/iwlwifi/iwl-led.c index cb11c4a4d691..4eee1b163cd2 100644 --- a/drivers/net/wireless/iwlwifi/iwl-led.c +++ b/drivers/net/wireless/iwlwifi/iwl-led.c @@ -27,7 +27,6 @@ #include #include -#include #include #include #include diff --git a/drivers/net/wireless/iwlwifi/iwl-rfkill.c b/drivers/net/wireless/iwlwifi/iwl-rfkill.c index e5e5846e9f25..5d642298f04c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-rfkill.c +++ b/drivers/net/wireless/iwlwifi/iwl-rfkill.c @@ -27,7 +27,6 @@ *****************************************************************************/ #include #include -#include #include #include diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c index 12f0310ae89c..78b2551fb19d 100644 --- a/drivers/rtc/rtc-max6902.c +++ b/drivers/rtc/rtc-max6902.c @@ -20,8 +20,6 @@ */ #include -#include - #include #include #include diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c index b35f9bfa2af4..395985b339c9 100644 --- a/drivers/rtc/rtc-r9701.c +++ b/drivers/rtc/rtc-r9701.c @@ -14,7 +14,6 @@ */ #include -#include #include #include #include diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c index 49ae1cd25caa..2de1e2fccbf9 100644 --- a/drivers/s390/net/ctcm_mpc.c +++ b/drivers/s390/net/ctcm_mpc.c @@ -19,7 +19,6 @@ #undef DEBUGDATA #undef DEBUGCCW -#include #include #include #include diff --git a/drivers/scsi/dpt/dpti_i2o.h b/drivers/scsi/dpt/dpti_i2o.h index 19406cea6d6a..179ad77f6cc9 100644 --- a/drivers/scsi/dpt/dpti_i2o.h +++ b/drivers/scsi/dpt/dpti_i2o.h @@ -21,7 +21,6 @@ #include -#include #include #include diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 7c615c70ec5c..bc9e6ddf41df 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -165,7 +165,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h index e0657b6f009c..4e49fbcfe8af 100644 --- a/drivers/scsi/ips.h +++ b/drivers/scsi/ips.h @@ -50,7 +50,6 @@ #ifndef _IPS_H_ #define _IPS_H_ -#include #include #include #include diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 90272e65957a..094b47e94b29 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c index edf9fdb3cb3c..22052bb7becb 100644 --- a/drivers/scsi/nsp32.c +++ b/drivers/scsi/nsp32.c @@ -23,7 +23,6 @@ * 1.2: PowerPC (big endian) support. */ -#include #include #include #include diff --git a/drivers/scsi/nsp32.h b/drivers/scsi/nsp32.h index 6715ecb3bfca..9565acf1aa72 100644 --- a/drivers/scsi/nsp32.h +++ b/drivers/scsi/nsp32.h @@ -16,7 +16,6 @@ #ifndef _NSP32_H #define _NSP32_H -#include //#define NSP32_DEBUG 9 /* diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index a221b6ef9fa9..24e6cb8396e3 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -25,7 +25,6 @@ ***********************************************************************/ -#include #include #include #include diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 50baf6a1d67c..93560cd72784 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -6,7 +6,6 @@ */ #include "qla_def.h" -#include #include #include #include diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index cb01b5106efd..b6483dd98acc 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c index 1500e1b3c302..abf8192f89e8 100644 --- a/drivers/usb/gadget/amd5536udc.c +++ b/drivers/usb/gadget/amd5536udc.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c index 538807384592..29d13ebe7500 100644 --- a/drivers/usb/gadget/s3c2410_udc.c +++ b/drivers/usb/gadget/s3c2410_udc.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index e6ca9979e3ae..a4ef77ef917d 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -19,7 +19,6 @@ #include #include #include -#include #include /* Version Information */ diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 2e663f1afd5e..d95382088075 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -38,8 +38,6 @@ #include #include -#include - /* the mode to be set when the port ist opened */ static int initial_mode = 1; diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c index 4bd569e479a7..314d18694b6a 100644 --- a/drivers/video/arkfb.c +++ b/drivers/video/arkfb.c @@ -11,7 +11,6 @@ * Code is based on s3fb */ -#include #include #include #include diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c index 8361bd0e3df1..4dcec48a1d78 100644 --- a/drivers/video/s3fb.c +++ b/drivers/video/s3fb.c @@ -11,7 +11,6 @@ * which is based on the code of neofb. */ -#include #include #include #include diff --git a/drivers/video/vermilion/vermilion.h b/drivers/video/vermilion/vermilion.h index c4aba59d4809..7491abfcf1fc 100644 --- a/drivers/video/vermilion/vermilion.h +++ b/drivers/video/vermilion/vermilion.h @@ -30,7 +30,6 @@ #define _VERMILION_H_ #include -#include #include #include #include diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c index 34aae7a2a62b..3df17dc8c3d7 100644 --- a/drivers/video/vt8623fb.c +++ b/drivers/video/vt8623fb.c @@ -12,7 +12,6 @@ * (http://davesdomain.org.uk/viafb/) */ -#include #include #include #include diff --git a/drivers/video/xilinxfb.c b/drivers/video/xilinxfb.c index 7b3a8423f485..5da3d2423cc0 100644 --- a/drivers/video/xilinxfb.c +++ b/drivers/video/xilinxfb.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 31559f45fdde..4c41db91eaa4 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h @@ -12,7 +12,6 @@ #ifndef _JFFS2_FS_I #define _JFFS2_FS_I -#include #include #include #include diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index cdc2d3464a1a..2813cdd72375 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -18,7 +18,6 @@ #ifndef __XFS_DMAPI_H__ #define __XFS_DMAPI_H__ -#include /* Values used to define the on-disk version of dm_attrname_t. All * on-disk attribute names start with the 8-byte string "SGI_DMI_". * diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h index 8e15dd28c91f..04ee0610014a 100644 --- a/include/asm-x86/xen/hypervisor.h +++ b/include/asm-x86/xen/hypervisor.h @@ -35,7 +35,6 @@ #include #include -#include #include #include diff --git a/include/linux/fs_uart_pd.h b/include/linux/fs_uart_pd.h index 809bb9ffc788..36b61ff39277 100644 --- a/include/linux/fs_uart_pd.h +++ b/include/linux/fs_uart_pd.h @@ -12,7 +12,6 @@ #ifndef FS_UART_PD_H #define FS_UART_PD_H -#include #include enum fs_uart_id { diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 21575fc46d05..1d3ef29a2583 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -14,7 +14,6 @@ */ #include -#include #include #include #include diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a0abf9a463f9..80ccac849e46 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index a9ab0596de44..532858fa5b88 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include diff --git a/kernel/utsname.c b/kernel/utsname.c index 64d398f12444..815237a55af8 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index fe3a56c2256d..4ab9659d269e 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -12,7 +12,6 @@ #include #include #include -#include #include static void *get_uts(ctl_table *table, int write) diff --git a/sound/mips/au1x00.c b/sound/mips/au1x00.c index ee0741f9eb53..fbef38a9604a 100644 --- a/sound/mips/au1x00.c +++ b/sound/mips/au1x00.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include diff --git a/sound/soc/at91/eti_b1_wm8731.c b/sound/soc/at91/eti_b1_wm8731.c index b081e83766b7..b81d6b2cfa1d 100644 --- a/sound/soc/at91/eti_b1_wm8731.c +++ b/sound/soc/at91/eti_b1_wm8731.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index 8604809f0c36..dc7b18fd2782 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -34,7 +34,6 @@ #include #include -#include #include #include #include diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index 1fb7f9a7aecd..2f1c91b1d556 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include -- cgit v1.2.3 From cc996099174dc05b35b7a29301026987990e7f8c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 2 Aug 2008 07:30:48 +0400 Subject: [PATCH] proc: inode number fixlet Ouch, if number taken from IDA is too big, the intent was to signal an error, not check for overflow and still do overflowing addition. One still needs 2^28 proc entries to notice this. Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro --- fs/proc/generic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4fb81e9c94e3..bca0f81eb687 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -330,6 +330,7 @@ retry: spin_lock(&proc_inum_lock); ida_remove(&proc_inum_ida, i); spin_unlock(&proc_inum_lock); + return 0; } return PROC_DYNAMIC_FIRST + i; } -- cgit v1.2.3 From 2d8a10cd1760e7ecc07a21e409485947c68a3291 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 11 Aug 2008 11:33:57 -0400 Subject: [PATCH] fix efs_lookup() it needs to use d_splice_alias(), not d_add() Signed-off-by: Al Viro --- fs/efs/namei.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 3a404e7fad53..291abb11e20e 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei } unlock_kernel(); - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, -- cgit v1.2.3 From e45b590b976465c258f3e2a6cc84573fc19e16d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Aug 2008 23:49:07 +0200 Subject: [PATCH] change d_add_ci argument ordering As pointed out during review d_add_ci argument order should match d_add, so switch the dentry and inode arguments. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/dcache.c | 2 +- fs/xfs/linux-2.6/xfs_iops.c | 2 +- include/linux/dcache.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 101663d15e9f..80e93956aced 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1236,7 +1236,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) * If no entry exists with the exact case name, allocate new dentry with * the exact case, and return the spliced entry. */ -struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry, +struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, struct qstr *name) { int error; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 91bcd979242c..095d271f3434 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -355,7 +355,7 @@ xfs_vn_ci_lookup( /* else case-insensitive match... */ dname.name = ci_name.name; dname.len = ci_name.len; - dentry = d_add_ci(VFS_I(ip), dentry, &dname); + dentry = d_add_ci(dentry, VFS_I(ip), &dname); kmem_free(ci_name.name); return dentry; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 07aa198f19ed..efba1de629ac 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -230,7 +230,7 @@ extern void d_delete(struct dentry *); extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_anon(struct inode *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); -extern struct dentry * d_add_ci(struct inode *, struct dentry *, struct qstr *); +extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern void shrink_dcache_for_umount(struct super_block *); -- cgit v1.2.3 From 2690421743b03c9be05d8e44c3b827986d1329a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Aug 2008 23:50:21 +0200 Subject: [PATCH] ntfs: use d_add_ci d_add_ci was lifted 1:1 from ntfs. Change ntfs to use the common version. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/ntfs/namei.c | 89 ++------------------------------------------------------- 1 file changed, 2 insertions(+), 87 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index e1781c8b1650..9e8a95be7a1e 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, // TODO: Consider moving this lot to a separate function! (AIA) handle_name: { - struct dentry *real_dent, *new_dent; MFT_RECORD *m; ntfs_attr_search_ctx *ctx; ntfs_inode *ni = NTFS_I(dent_inode); @@ -255,93 +254,9 @@ handle_name: } nls_name.hash = full_name_hash(nls_name.name, nls_name.len); - /* - * Note: No need for dent->d_lock lock as i_mutex is held on the - * parent inode. - */ - - /* Does a dentry matching the nls_name exist already? */ - real_dent = d_lookup(dent->d_parent, &nls_name); - /* If not, create it now. */ - if (!real_dent) { - real_dent = d_alloc(dent->d_parent, &nls_name); - kfree(nls_name.name); - if (!real_dent) { - err = -ENOMEM; - goto err_out; - } - new_dent = d_splice_alias(dent_inode, real_dent); - if (new_dent) - dput(real_dent); - else - new_dent = real_dent; - ntfs_debug("Done. (Created new dentry.)"); - return new_dent; - } + dent = d_add_ci(dent, dent_inode, &nls_name); kfree(nls_name.name); - /* Matching dentry exists, check if it is negative. */ - if (real_dent->d_inode) { - if (unlikely(real_dent->d_inode != dent_inode)) { - /* This can happen because bad inodes are unhashed. */ - BUG_ON(!is_bad_inode(dent_inode)); - BUG_ON(!is_bad_inode(real_dent->d_inode)); - } - /* - * Already have the inode and the dentry attached, decrement - * the reference count to balance the ntfs_iget() we did - * earlier on. We found the dentry using d_lookup() so it - * cannot be disconnected and thus we do not need to worry - * about any NFS/disconnectedness issues here. - */ - iput(dent_inode); - ntfs_debug("Done. (Already had inode and dentry.)"); - return real_dent; - } - /* - * Negative dentry: instantiate it unless the inode is a directory and - * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), - * in which case d_move() that in place of the found dentry. - */ - if (!S_ISDIR(dent_inode->i_mode)) { - /* Not a directory; everything is easy. */ - d_instantiate(real_dent, dent_inode); - ntfs_debug("Done. (Already had negative file dentry.)"); - return real_dent; - } - spin_lock(&dcache_lock); - if (list_empty(&dent_inode->i_dentry)) { - /* - * Directory without a 'disconnected' dentry; we need to do - * d_instantiate() by hand because it takes dcache_lock which - * we already hold. - */ - list_add(&real_dent->d_alias, &dent_inode->i_dentry); - real_dent->d_inode = dent_inode; - spin_unlock(&dcache_lock); - security_d_instantiate(real_dent, dent_inode); - ntfs_debug("Done. (Already had negative directory dentry.)"); - return real_dent; - } - /* - * Directory with a 'disconnected' dentry; get a reference to the - * 'disconnected' dentry. - */ - new_dent = list_entry(dent_inode->i_dentry.next, struct dentry, - d_alias); - dget_locked(new_dent); - spin_unlock(&dcache_lock); - /* Do security vodoo. */ - security_d_instantiate(real_dent, dent_inode); - /* Move new_dent in place of real_dent. */ - d_move(new_dent, real_dent); - /* Balance the ntfs_iget() we did above. */ - iput(dent_inode); - /* Throw away real_dent. */ - dput(real_dent); - /* Use new_dent as the actual dentry. */ - ntfs_debug("Done. (Already had negative, disconnected directory " - "dentry.)"); - return new_dent; + return dent; eio_err_out: ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); -- cgit v1.2.3 From 8f3f655da7288504c1013621090ecc940173ae1c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Aug 2008 00:28:24 -0400 Subject: [PATCH] fix regular readdir() and friends Handling of -EOVERFLOW. Signed-off-by: Al Viro --- fs/compat.c | 8 ++++++-- fs/readdir.c | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index c9d1472e65c5..075d0509970d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen, if (buf->result) return -EINVAL; d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->result = -EOVERFLOW; return -EOVERFLOW; + } buf->result++; dirent = buf->dirent; if (!access_ok(VERIFY_WRITE, dirent, @@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen, if (reclen > buf->count) return -EINVAL; d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->error = -EOVERFLOW; return -EOVERFLOW; + } dirent = buf->previous; if (dirent) { if (__put_user(offset, &dirent->d_off)) diff --git a/fs/readdir.c b/fs/readdir.c index 4e026e5407fb..93a7559bbfd8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset if (buf->result) return -EINVAL; d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->result = -EOVERFLOW; return -EOVERFLOW; + } buf->result++; dirent = buf->dirent; if (!access_ok(VERIFY_WRITE, dirent, @@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset, if (reclen > buf->count) return -EINVAL; d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->error = -EOVERFLOW; return -EOVERFLOW; + } dirent = buf->previous; if (dirent) { if (__put_user(offset, &dirent->d_off)) -- cgit v1.2.3 From 59af1584bf33810639cb98d79856021253e2177c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Aug 2008 07:24:41 -0400 Subject: [PATCH] fix ->llseek() for a bunch of directories Signed-off-by: Al Viro --- fs/9p/vfs_dir.c | 1 + fs/adfs/dir.c | 1 + fs/affs/dir.c | 1 + fs/autofs4/root.c | 2 ++ fs/befs/linuxvfs.c | 1 + fs/xfs/linux-2.6/xfs_file.c | 1 + 6 files changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 88e3787c6ea9..e298fe194093 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) const struct file_operations v9fs_dir_operations = { .read = generic_read_dir, + .llseek = generic_file_llseek, .readdir = v9fs_dir_readdir, .open = v9fs_file_open, .release = v9fs_dir_release, diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index fc1a8dc64d78..85a30e929800 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -197,6 +197,7 @@ out: const struct file_operations adfs_dir_operations = { .read = generic_read_dir, + .llseek = generic_file_llseek, .readdir = adfs_readdir, .fsync = file_fsync, }; diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 6e3f282424b0..7b36904dbeac 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t); const struct file_operations affs_dir_operations = { .read = generic_read_dir, + .llseek = generic_file_llseek, .readdir = affs_readdir, .fsync = file_fsync, }; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index bcfb2dc0a61b..2a41c2a7fc52 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = { .release = dcache_dir_close, .read = generic_read_dir, .readdir = dcache_readdir, + .llseek = dcache_dir_lseek, .ioctl = autofs4_root_ioctl, }; @@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = { .release = dcache_dir_close, .read = generic_read_dir, .readdir = dcache_readdir, + .llseek = dcache_dir_lseek, }; const struct inode_operations autofs4_indirect_root_inode_operations = { diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 02c6e62b72f8..740f53672a8a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep; static const struct file_operations befs_dir_operations = { .read = generic_read_dir, .readdir = befs_readdir, + .llseek = generic_file_llseek, }; static const struct inode_operations befs_dir_inode_operations = { diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 5f60363b9343..5311c1acdd40 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = { const struct file_operations xfs_dir_file_operations = { .read = generic_read_dir, .readdir = xfs_file_readdir, + .llseek = generic_file_llseek, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, -- cgit v1.2.3 From 4cdfe84b51420c9ac95c7133da2d4c8a191094af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Aug 2008 07:45:33 -0400 Subject: [PATCH] deal with the first call of ->show() generating no output seq_read() has a subtle bug - we want the first loop there to go until at least one *non-empty* record had fit entirely into buffer. Signed-off-by: Al Viro --- fs/seq_file.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/seq_file.c b/fs/seq_file.c index 5d54205e486b..bd20f7f5a933 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) goto Done; } /* we need at least one record in buffer */ + pos = m->index; + p = m->op->start(m, &pos); while (1) { - pos = m->index; - p = m->op->start(m, &pos); err = PTR_ERR(p); if (!p || IS_ERR(p)) break; @@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) break; if (unlikely(err)) m->count = 0; + if (unlikely(!m->count)) { + p = m->op->next(m, p, &pos); + m->index = pos; + continue; + } if (m->count < m->size) goto Fill; m->op->stop(m, p); @@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) goto Enomem; m->count = 0; m->version = 0; + pos = m->index; + p = m->op->start(m, &pos); } m->op->stop(m, p); m->count = 0; -- cgit v1.2.3 From d6817cdbd143f87f9d7c59a4c3194091190eeb84 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 22 Aug 2008 14:30:10 -0700 Subject: ocfs2: Increment the reference count of an already-active stack. The ocfs2_stack_driver_request() function failed to increment the refcount of an already-active stack. It only did the increment on the first reference. Whoops. Signed-off-by: Joel Becker Tested-by: Marcos Matsunaga Signed-off-by: Mark Fasheh --- fs/ocfs2/stackglue.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 10e149ae5e3a..07f348b8d721 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name, goto out; } - /* Ok, the stack is pinned */ - p->sp_count++; active_stack = p; - rc = 0; out: + /* If we found it, pin it */ + if (!rc) + active_stack->sp_count++; + spin_unlock(&ocfs2_stack_lock); return rc; } -- cgit v1.2.3 From 6ce5eecb9cd3ac97b952c50309b87c31488a45e9 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 26 Aug 2008 00:37:14 +0000 Subject: [CIFS] check version in spnego upcall response Currently, we don't check the version in the SPNEGO upcall response even though one is provided. Jeff and Q have made the corresponding change to the Samba client (cifs.upcall). Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/CHANGES | 6 +++++- fs/cifs/cifs_spnego.h | 2 +- fs/cifs/sess.c | 9 +++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index f5d0083e09fa..526041a52d35 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -4,7 +4,11 @@ Fix premature write failure on congested networks (we would give up on EAGAIN from the socket too quickly on large writes). Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. Fix endian problems in acl (mode from/to cifs acl) on bigendian -architectures. +architectures. Fix problems with preserving timestamps on copying open +files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit +on parent directory when server supports Unix Extensions but not POSIX +create. Update cifs.upcall version to handle new Kerberos sec flags +(this requires update of cifs.upcall program from Samba). Version 1.53 ------------ diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index 05a34b17a1ab..e4041ec4d712 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h @@ -23,7 +23,7 @@ #ifndef _CIFS_SPNEGO_H #define _CIFS_SPNEGO_H -#define CIFS_SPNEGO_UPCALL_VERSION 1 +#define CIFS_SPNEGO_UPCALL_VERSION 2 /* * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 3188e4d9cddb..b537fad3bf50 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -516,6 +516,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, } msg = spnego_key->payload.data; + /* check version field to make sure that cifs.upcall is + sending us a response in an expected form */ + if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { + cERROR(1, ("incorrect version of cifs.upcall (expected" + " %d but got %d)", + CIFS_SPNEGO_UPCALL_VERSION, msg->version)); + rc = -EKEYREJECTED; + goto ssetup_exit; + } /* bail out if key is too long */ if (msg->sesskey_len > sizeof(ses->server->mac_signing_key.data.krb5)) { -- cgit v1.2.3 From e9775843ecb039318dbc9ded6da9c762bff28a0b Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 26 Aug 2008 18:22:50 +0000 Subject: [CIFS] Correct keys dependency for cifs kerberos support Must also depend on CIFS ... Signed-off-by: Steve French --- fs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index f0427105a619..3fab3901e0ef 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1984,7 +1984,7 @@ config CIFS_EXPERIMENTAL config CIFS_UPCALL bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" - depends on KEYS + depends on CIFS && KEYS help Enables an upcall mechanism for CIFS which accesses userspace helper utilities to provide SPNEGO packaged (RFC 4178) -- cgit v1.2.3 From 96c2a1137b9e00bcdbe3a95113ea8f42ca994f76 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 26 Aug 2008 18:32:28 +0000 Subject: [CIFS] Reorder cifs config item for better clarity Signed-off-by: Steve French --- fs/Kconfig | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index 3fab3901e0ef..abccb5dab9a8 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1930,6 +1930,16 @@ config CIFS_WEAK_PW_HASH If unsure, say N. +config CIFS_UPCALL + bool "Kerberos/SPNEGO advanced session setup" + depends on CIFS && KEYS + help + Enables an upcall mechanism for CIFS which accesses + userspace helper utilities to provide SPNEGO packaged (RFC 4178) + Kerberos tickets which are needed to mount to certain secure servers + (for which more secure Kerberos authentication is required). If + unsure, say N. + config CIFS_XATTR bool "CIFS extended attributes" depends on CIFS @@ -1982,16 +1992,6 @@ config CIFS_EXPERIMENTAL (which is disabled by default). See the file fs/cifs/README for more details. If unsure, say N. -config CIFS_UPCALL - bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" - depends on CIFS && KEYS - help - Enables an upcall mechanism for CIFS which accesses - userspace helper utilities to provide SPNEGO packaged (RFC 4178) - Kerberos tickets which are needed to mount to certain secure servers - (for which more secure Kerberos authentication is required). If - unsure, say N. - config CIFS_DFS_UPCALL bool "DFS feature support (EXPERIMENTAL)" depends on CIFS_EXPERIMENTAL -- cgit v1.2.3 From 48fd4f93a00eac844678629f2f00518e146ed30d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 22 Aug 2008 10:00:36 +0200 Subject: block: submit_bh() inadvertently discards barrier flag on a sync write Reported by Milan Broz , commit 18ce3751 inadvertently made submit_bh() discard the barrier bit for a WRITE_SYNC request. Fix that up. Signed-off-by: Jens Axboe --- fs/buffer.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 38653e36e225..ac78d4c19b3b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(!buffer_mapped(bh)); BUG_ON(!bh->b_end_io); - if (buffer_ordered(bh) && (rw == WRITE)) - rw = WRITE_BARRIER; + /* + * Mask in barrier bit for a write (could be either a WRITE or a + * WRITE_SYNC + */ + if (buffer_ordered(bh) && (rw & WRITE)) + rw |= WRITE_BARRIER; /* - * Only clear out a write error when rewriting, should this - * include WRITE_SYNC as well? + * Only clear out a write error when rewriting */ - if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER)) + if (test_set_buffer_req(bh) && (rw & WRITE)) clear_buffer_write_io_error(bh); /* -- cgit v1.2.3 From 76029ff37f31dad64641489c610d98955217bb68 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 25 Aug 2008 20:36:08 +0200 Subject: bio: fix bio_copy_kern() handling of bio->bv_len The commit 68154e90c9d1492d570671ae181d9a8f8530da55 introduced bio_copy_kern() to add bounce support to blk_rq_map_kern. bio_copy_kern() uses bio->bv_len to copy data for READ commands after the completion but it doesn't work with a request that partially completed. SCSI always completes a PC request as a whole but seems some don't. This patch fixes bio_copy_kern to handle the above case. As bio_copy_user does, bio_copy_kern uses struct bio_map_data to store struct bio_vec. Signed-off-by: FUJITA Tomonori Reported-by: Nix Tested-by: Nix Cc: stable@kernel.org Signed-off-by: Jens Axboe --- fs/bio.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 8000e2fa16cb..8b1f5ee6f83c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -469,20 +469,21 @@ static void bio_free_map_data(struct bio_map_data *bmd) kfree(bmd); } -static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) +static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, + gfp_t gfp_mask) { - struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); + struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); if (!bmd) return NULL; - bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); + bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask); if (!bmd->iovecs) { kfree(bmd); return NULL; } - bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); + bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask); if (bmd->sgvecs) return bmd; @@ -596,7 +597,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, len += iov[i].iov_len; } - bmd = bio_alloc_map_data(nr_pages, iov_count); + bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); if (!bmd) return ERR_PTR(-ENOMEM); @@ -942,19 +943,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err) { struct bio_vec *bvec; const int read = bio_data_dir(bio) == READ; - char *p = bio->bi_private; + struct bio_map_data *bmd = bio->bi_private; int i; + char *p = bmd->sgvecs[0].iov_base; __bio_for_each_segment(bvec, bio, i, 0) { char *addr = page_address(bvec->bv_page); + int len = bmd->iovecs[i].bv_len; if (read && !err) - memcpy(p, addr, bvec->bv_len); + memcpy(p, addr, len); __free_page(bvec->bv_page); - p += bvec->bv_len; + p += len; } + bio_free_map_data(bmd); bio_put(bio); } @@ -978,11 +982,21 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, const int nr_pages = end - start; struct bio *bio; struct bio_vec *bvec; + struct bio_map_data *bmd; int i, ret; + struct sg_iovec iov; + + iov.iov_base = data; + iov.iov_len = len; + + bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); + if (!bmd) + return ERR_PTR(-ENOMEM); + ret = -ENOMEM; bio = bio_alloc(gfp_mask, nr_pages); if (!bio) - return ERR_PTR(-ENOMEM); + goto out_bmd; while (len) { struct page *page; @@ -1016,14 +1030,18 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, } } - bio->bi_private = data; + bio->bi_private = bmd; bio->bi_end_io = bio_copy_kern_endio; + + bio_set_map_data(bmd, bio, &iov, 1); return bio; cleanup: bio_for_each_segment(bvec, bio, i) __free_page(bvec->bv_page); bio_put(bio); +out_bmd: + bio_free_map_data(bmd); return ERR_PTR(ret); } -- cgit v1.2.3 From aefcc28a3a63ac33a298777aa50ba43641c75241 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 25 Aug 2008 20:36:08 +0200 Subject: bio: fix __bio_copy_iov() handling of bio->bv_len The commit c5dec1c3034f1ae3503efbf641ff3b0273b64797 introduced __bio_copy_iov() to add bounce support to blk_rq_map_user_iov. __bio_copy_iov() uses bio->bv_len to copy data for READ commands after the completion but it doesn't work with a request that partially completed. SCSI always completes a PC request as a whole but seems some don't. Signed-off-by: FUJITA Tomonori Cc: stable@kernel.org Signed-off-by: Jens Axboe --- fs/bio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 8b1f5ee6f83c..3cba7ae34d75 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -492,8 +492,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, return NULL; } -static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, - int uncopy) +static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, + struct sg_iovec *iov, int iov_count, int uncopy) { int ret = 0, i; struct bio_vec *bvec; @@ -503,7 +503,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, __bio_for_each_segment(bvec, bio, i, 0) { char *bv_addr = page_address(bvec->bv_page); - unsigned int bv_len = bvec->bv_len; + unsigned int bv_len = iovecs[i].bv_len; while (bv_len && iov_idx < iov_count) { unsigned int bytes; @@ -555,7 +555,7 @@ int bio_uncopy_user(struct bio *bio) struct bio_map_data *bmd = bio->bi_private; int ret; - ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); bio_free_map_data(bmd); bio_put(bio); @@ -634,7 +634,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, * success */ if (!write_to_vm) { - ret = __bio_copy_iov(bio, iov, iov_count, 0); + ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); if (ret) goto cleanup; } -- cgit v1.2.3 From 87ed1d65fb536a0cd4e84874c0b038f953e448aa Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Aug 2008 17:53:30 +0000 Subject: [CIFS] Add destroy routine for dns_resolver Otherwise, we're leaking the payload memory. CC: Stable Kernel Acked-by: David Howells Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/CHANGES | 3 ++- fs/cifs/dns_resolve.c | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 526041a52d35..f9e4ad97a79e 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -8,7 +8,8 @@ architectures. Fix problems with preserving timestamps on copying open files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit on parent directory when server supports Unix Extensions but not POSIX create. Update cifs.upcall version to handle new Kerberos sec flags -(this requires update of cifs.upcall program from Samba). +(this requires update of cifs.upcall program from Samba). Fix memory leak +on dns_upcall (resolving DFS referralls). Version 1.53 ------------ diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index f730ef35499e..a2e0673e1b08 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -47,11 +47,18 @@ static int dns_resolver_instantiate(struct key *key, const void *data, return rc; } +static void +dns_resolver_destroy(struct key *key) +{ + kfree(key->payload.data); +} + struct key_type key_type_dns_resolver = { .name = "dns_resolver", .def_datalen = sizeof(struct in_addr), .describe = user_describe, .instantiate = dns_resolver_instantiate, + .destroy = dns_resolver_destroy, .match = user_match, }; -- cgit v1.2.3 From bcc55c6664a90146149ba0fd93052adc94287b9f Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 27 Aug 2008 21:30:22 +0000 Subject: [CIFS] Fix plaintext authentication The last eight bytes of the password field were not cleared when doing lanman plaintext password authentication. This patch fixes that. I tested it with Samba by setting password encryption to no in the server's smb.conf. Other servers also can be configured to force plaintext authentication. Note that plaintexti authentication requires setting /proc/fs/cifs/SecurityFlags to 0x30030 on the client (enabling both LANMAN and also plaintext password support). Also note that LANMAN support (and thus plaintext password support) requires CONFIG_CIFS_WEAK_PW_HASH to be enabled in menuconfig. CC: Jeff Layton CC: Stable Kernel Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 83fd40dc1ef0..bd5f13d38450 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key) if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) if (extended_security & CIFSSEC_MAY_PLNTXT) { + memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); memcpy(lnm_session_key, password_with_pad, CIFS_ENCPWD_SIZE); return; -- cgit v1.2.3 From 838726c4756813576078203eb7e1e219db0da870 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 28 Aug 2008 07:54:59 -0400 Subject: cifs: fix O_APPEND on directio mounts The direct I/O write codepath for CIFS is done through cifs_user_write(). That function does not currently call generic_write_checks() so the file position isn't being properly set when the file is opened with O_APPEND. It's also not doing the other "normal" checks that should be done for a write call. The problem is currently that when you open a file with O_APPEND on a mount with the directio mount option, the file position is set to the beginning of the file. This makes any subsequent writes clobber the data in the file starting at the beginning. This seems to fix the problem in cursory testing. It is, however important to note that NFS disallows the combination of (O_DIRECT|O_APPEND). If my understanding is correct, the concern is races with multiple clients appending to a file clobbering each others' data. Since the write model for CIFS and NFS is pretty similar in this regard, CIFS is probably subject to the same sort of races. What's unclear to me is why this is a particular problem with O_DIRECT and not with buffered writes... Regardless, disallowing O_APPEND on an entire mount is probably not reasonable, so we'll probably just have to deal with it and reevaluate this flag combination when we get proper support for O_DIRECT. In the meantime this patch at least fixes the existing problem. Signed-off-by: Jeff Layton Cc: Stable Tree Signed-off-by: Steve French --- fs/cifs/file.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ff14d14903a0..cbefe1f1f9fe 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -833,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, return -EBADF; open_file = (struct cifsFileInfo *) file->private_data; + rc = generic_write_checks(file, poffset, &write_size, 0); + if (rc) + return rc; + xid = GetXid(); if (*poffset > file->f_path.dentry->d_inode->i_size) -- cgit v1.2.3 From 2e655021b8d50b5d90ce442f3de6bf3667729910 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 28 Aug 2008 15:30:06 +0000 Subject: [CIFS] update cifs change log Signed-off-by: Steve French --- fs/cifs/CHANGES | 5 ++++- fs/cifs/README | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index f9e4ad97a79e..06e521a945c3 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -9,7 +9,10 @@ files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit on parent directory when server supports Unix Extensions but not POSIX create. Update cifs.upcall version to handle new Kerberos sec flags (this requires update of cifs.upcall program from Samba). Fix memory leak -on dns_upcall (resolving DFS referralls). +on dns_upcall (resolving DFS referralls). Fix plain text password +authentication (requires setting SecurityFlags to 0x30030 to enable +lanman and plain text though). Fix writes to be at correct offset when +file is open with O_APPEND and file is on a directio (forcediretio) mount. Version 1.53 ------------ diff --git a/fs/cifs/README b/fs/cifs/README index 68b5c1169d9d..bd2343d4c6a6 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -542,10 +542,20 @@ SecurityFlags Flags which control security negotiation and hashing mechanisms (as "must use") on the other hand does not make much sense. Default flags are 0x07007 - (NTLM, NTLMv2 and packet signing allowed). Maximum + (NTLM, NTLMv2 and packet signing allowed). The maximum allowable flags if you want to allow mounts to servers using weaker password hashes is 0x37037 (lanman, - plaintext, ntlm, ntlmv2, signing allowed): + plaintext, ntlm, ntlmv2, signing allowed). Some + SecurityFlags require the corresponding menuconfig + options to be enabled (lanman and plaintext require + CONFIG_CIFS_WEAK_PW_HASH for example). Enabling + plaintext authentication currently requires also + enabling lanman authentication in the security flags + because the cifs module only supports sending + laintext passwords using the older lanman dialect + form of the session setup SMB. (e.g. for authentication + using plain text passwords, set the SecurityFlags + to 0x30030): may use packet signing 0x00001 must use packet signing 0x01001 -- cgit v1.2.3 From c76da9da1fffa6de263486df54950eb328d58f71 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 28 Aug 2008 15:32:22 +0000 Subject: [CIFS] Turn off Unicode during session establishment for plaintext authentication LANMAN session setup did not support Unicode (after session setup, unicode can still be used though). Fixes samba bug# 5319 CC: Jeff Layton CC: Stable Kernel Signed-off-by: Steve French --- fs/cifs/sess.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index b537fad3bf50..252fdc0567f1 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, #ifdef CONFIG_CIFS_WEAK_PW_HASH char lnm_session_key[CIFS_SESS_KEY_SIZE]; + pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; + /* no capabilities flags in old lanman negotiation */ pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); -- cgit v1.2.3 From c228c24bf1138d4757dbe20615df655815446da3 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 21 Aug 2008 08:42:16 -0400 Subject: nfsd: fix compound state allocation error handling Move the cstate_alloc call so that if it fails, the response is setup to encode the NFS error. The out label now means that the nfsd4_compound_state has not been allocated. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2e51adac65de..e5b51ffafc6c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, int slack_bytes; __be32 status; - status = nfserr_resource; - cstate = cstate_alloc(); - if (cstate == NULL) - goto out; - resp->xbuf = &rqstp->rq_res; resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; resp->tagp = resp->p; @@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) goto out; + status = nfserr_resource; + cstate = cstate_alloc(); + if (cstate == NULL) + goto out; + status = nfs_ok; while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; @@ -957,9 +957,9 @@ encode_op: nfsd4_increment_op_stats(op->opnum); } + cstate_free(cstate); out: nfsd4_release_compoundargs(args); - cstate_free(cstate); dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } -- cgit v1.2.3 From 91b80969ba466ba4b915a4a1d03add8c297add3f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 29 Aug 2008 19:18:45 -0400 Subject: nfsd: fix buffer overrun decoding NFSv4 acl The array we kmalloc() here is not large enough. Thanks to Johann Dahm and David Richter for bug report and testing. Signed-off-by: J. Bruce Fields Cc: David Richter Tested-by: Johann Dahm --- fs/nfsd/nfs4acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index b6ed38380ab8..54b8b4140c8f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt) * enough space for either: */ alloc = sizeof(struct posix_ace_state_array) - + cnt*sizeof(struct posix_ace_state); + + cnt*sizeof(struct posix_user_ace_state); state->users = kzalloc(alloc, GFP_KERNEL); if (!state->users) return -ENOMEM; -- cgit v1.2.3 From 169ccbd44eb20f5bb7e4352451eba25397e29749 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 2 Sep 2008 14:35:37 -0700 Subject: NTFS: update homepage Update the location of the NTFS homepage in several files. Signed-off-by: Adrian Bunk Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/ntfs.txt | 4 ++-- MAINTAINERS | 2 +- fs/ntfs/usnjrnl.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt index e79ee2db183a..ac2a261c5f7d 100644 --- a/Documentation/filesystems/ntfs.txt +++ b/Documentation/filesystems/ntfs.txt @@ -40,7 +40,7 @@ Web site ======== There is plenty of additional information on the linux-ntfs web site -at http://linux-ntfs.sourceforge.net/ +at http://www.linux-ntfs.org/ The web site has a lot of additional information, such as a comprehensive FAQ, documentation on the NTFS on-disk format, information on the Linux-NTFS @@ -272,7 +272,7 @@ And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 = For Win2k and later dynamic disks, you can for example use the ldminfo utility which is part of the Linux LDM tools (the latest version at the time of writing is linux-ldm-0.0.8.tar.bz2). You can download it from: - http://linux-ntfs.sourceforge.net/downloads.html + http://www.linux-ntfs.org/ Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go into it (cd linux-ldm-0.0.8) and change to the test directory (cd test). You will find the precompiled (i386) ldminfo utility there. NOTE: You will not be diff --git a/MAINTAINERS b/MAINTAINERS index c4ca99cf80df..be83f3424f3b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3051,7 +3051,7 @@ P: Anton Altaparmakov M: aia21@cantab.net L: linux-ntfs-dev@lists.sourceforge.net L: linux-kernel@vger.kernel.org -W: http://linux-ntfs.sf.net/ +W: http://www.linux-ntfs.org/ T: git kernel.org:/pub/scm/linux/kernel/git/aia21/ntfs-2.6.git S: Maintained diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index 3a8af75351e8..4087fbdac327 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h @@ -113,7 +113,7 @@ typedef struct { * Reason flags (32-bit). Cumulative flags describing the change(s) to the * file since it was last opened. I think the names speak for themselves but * if you disagree check out the descriptions in the Linux NTFS project NTFS - * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html + * documentation: http://www.linux-ntfs.org/ */ enum { USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), @@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS; * Source info flags (32-bit). Information about the source of the change(s) * to the file. For detailed descriptions of what these mean, see the Linux * NTFS project NTFS documentation: - * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html + * http://www.linux-ntfs.org/ */ enum { USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), -- cgit v1.2.3 From 4b8561521dbaa3d766b198496b220e984e3bf756 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 2 Sep 2008 14:35:53 -0700 Subject: mm: show quicklist usage in /proc/meminfo Quicklists can consume several GB of memory. We should provide a means of monitoring this. After this patch is applied, /proc/meminfo will output the following: % cat /proc/meminfo MemTotal: 7715392 kB MemFree: 5401600 kB Buffers: 80384 kB Cached: 300800 kB SwapCached: 0 kB Active: 235584 kB Inactive: 262656 kB SwapTotal: 2031488 kB SwapFree: 2031488 kB Dirty: 3520 kB Writeback: 0 kB AnonPages: 117696 kB Mapped: 38528 kB Slab: 1589952 kB SReclaimable: 23104 kB SUnreclaim: 1566848 kB PageTables: 14656 kB NFS_Unstable: 0 kB Bounce: 0 kB WritebackTmp: 0 kB CommitLimit: 5889152 kB Committed_AS: 393152 kB VmallocTotal: 17592177655808 kB VmallocUsed: 29056 kB VmallocChunk: 17592177626432 kB Quicklists: 130944 kB HugePages_Total: 0 HugePages_Free: 0 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 262144 kB Signed-off-by: KOSAKI Motohiro Cc: Christoph Lameter Cc: Keiichiro Tokunaga Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 7 +++++-- include/linux/quicklist.h | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index ded969862960..00f10a2dcf12 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -189,7 +190,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "Committed_AS: %8lu kB\n" "VmallocTotal: %8lu kB\n" "VmallocUsed: %8lu kB\n" - "VmallocChunk: %8lu kB\n", + "VmallocChunk: %8lu kB\n" + "Quicklists: %8lu kB\n", K(i.totalram), K(i.freeram), K(i.bufferram), @@ -221,7 +223,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(committed), (unsigned long)VMALLOC_TOTAL >> 10, vmi.used >> 10, - vmi.largest_chunk >> 10 + vmi.largest_chunk >> 10, + K(quicklist_total_size()) ); len += hugetlb_report_meminfo(page + len); diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h index 39b66713a0bb..bd466439c588 100644 --- a/include/linux/quicklist.h +++ b/include/linux/quicklist.h @@ -80,6 +80,13 @@ void quicklist_trim(int nr, void (*dtor)(void *), unsigned long quicklist_total_size(void); +#else + +static inline unsigned long quicklist_total_size(void) +{ + return 0; +} + #endif #endif /* LINUX_QUICKLIST_H */ -- cgit v1.2.3 From 49048622eae698e5c4ae61f7e71200f265ccc529 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 5 Sep 2008 18:12:23 +0200 Subject: sched: fix process time monotonicity Spencer reported a problem where utime and stime were going negative despite the fixes in commit b27f03d4bdc145a09fb7b0c0e004b29f1ee555fa. The suspected reason for the problem is that signal_struct maintains it's own utime and stime (of exited tasks), these are not updated using the new task_utime() routine, hence sig->utime can go backwards and cause the same problem to occur (sig->utime, adds tsk->utime and not task_utime()). This patch fixes the problem TODO: using max(task->prev_utime, derived utime) works for now, but a more generic solution is to implement cputime_max() and use the cputime_gt() function for comparison. Reported-by: spencer@bluehost.com Signed-off-by: Balbir Singh Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- fs/proc/array.c | 59 --------------------------------------------------- include/linux/sched.h | 4 ++++ kernel/exit.c | 6 +++--- kernel/sched.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 0d6eb33597c6..71c9be59c9c2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, return 0; } -/* - * Use precise platform statistics if available: - */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -static cputime_t task_utime(struct task_struct *p) -{ - return p->utime; -} - -static cputime_t task_stime(struct task_struct *p) -{ - return p->stime; -} -#else -static cputime_t task_utime(struct task_struct *p) -{ - clock_t utime = cputime_to_clock_t(p->utime), - total = utime + cputime_to_clock_t(p->stime); - u64 temp; - - /* - * Use CFS's precise accounting: - */ - temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); - - if (total) { - temp *= utime; - do_div(temp, total); - } - utime = (clock_t)temp; - - p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); - return p->prev_utime; -} - -static cputime_t task_stime(struct task_struct *p) -{ - clock_t stime; - - /* - * Use CFS's precise accounting. (we subtract utime from - * the total, to make sure the total observed by userspace - * grows monotonically - apps rely on that): - */ - stime = nsec_to_clock_t(p->se.sum_exec_runtime) - - cputime_to_clock_t(task_utime(p)); - - if (stime >= 0) - p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); - - return p->prev_stime; -} -#endif - -static cputime_t task_gtime(struct task_struct *p) -{ - return p->gtime; -} - static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task, int whole) { diff --git a/include/linux/sched.h b/include/linux/sched.h index cfb0d87b99fc..3d9120c5ad15 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1475,6 +1475,10 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +extern cputime_t task_utime(struct task_struct *p); +extern cputime_t task_stime(struct task_struct *p); +extern cputime_t task_gtime(struct task_struct *p); + /* * Per process flags */ diff --git a/kernel/exit.c b/kernel/exit.c index 25ed2ad986df..16395644a98f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -112,9 +112,9 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime = cputime_add(sig->utime, tsk->utime); - sig->stime = cputime_add(sig->stime, tsk->stime); - sig->gtime = cputime_add(sig->gtime, tsk->gtime); + sig->utime = cputime_add(sig->utime, task_utime(tsk)); + sig->stime = cputime_add(sig->stime, task_stime(tsk)); + sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; diff --git a/kernel/sched.c b/kernel/sched.c index 9a1ddb84e26d..1a5f73c1fcdc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4178,6 +4178,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal) cpustat->steal = cputime64_add(cpustat->steal, tmp); } +/* + * Use precise platform statistics if available: + */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +cputime_t task_utime(struct task_struct *p) +{ + return p->utime; +} + +cputime_t task_stime(struct task_struct *p) +{ + return p->stime; +} +#else +cputime_t task_utime(struct task_struct *p) +{ + clock_t utime = cputime_to_clock_t(p->utime), + total = utime + cputime_to_clock_t(p->stime); + u64 temp; + + /* + * Use CFS's precise accounting: + */ + temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); + + if (total) { + temp *= utime; + do_div(temp, total); + } + utime = (clock_t)temp; + + p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); + return p->prev_utime; +} + +cputime_t task_stime(struct task_struct *p) +{ + clock_t stime; + + /* + * Use CFS's precise accounting. (we subtract utime from + * the total, to make sure the total observed by userspace + * grows monotonically - apps rely on that): + */ + stime = nsec_to_clock_t(p->se.sum_exec_runtime) - + cputime_to_clock_t(task_utime(p)); + + if (stime >= 0) + p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); + + return p->prev_stime; +} +#endif + +inline cputime_t task_gtime(struct task_struct *p) +{ + return p->gtime; +} + /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. -- cgit v1.2.3 From af904deaf6da3f3285eb0a06a3dc6a1af0251030 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 8 Sep 2008 11:58:13 -0400 Subject: NFS: Restore missing hunk in NFS mount option parser Automounter maps can contain mount options valid for other NFS implementations but not for Linux. The Linux automounter uses the mount command's "-s" command line option ("s" for "sloppy") so that mount requests containing such options are not rejected. Commit f45663ce5fb30f76a3414ab3ac69f4dd320e760a attempted to address a known regression with text-based NFS mount option parsing. Unrecognized mount options would cause mount requests to fail, even if the "-s" option was used on the mount command line. Unfortunately, this commit was not complete as submitted. It adds a new mount option, "sloppy". But it is missing a hunk, so it now allows NFS mounts with unrecognized mount options, even if the "sloppy" option is not present. This could be a problem if a required critical mount option such as "sync" is misspelled, for example, and is considered a regression from 2.6.26. This patch restores the missing hunk. Now, the default behavior of text-based NFS mount options is as before: any unrecognized mount option will cause the mount to fail. Please include this in 2.6.27-rc. Thanks to Neil Brown for reporting this. Signed-off-by: Chuck Lever Acked-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- fs/nfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9abcd2b329f7..e9b20173fef3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw, } } + if (errors > 0) { + dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", + errors, (errors == 1 ? "" : "s")); + if (!sloppy) + return 0; + } return 1; out_nomem: -- cgit v1.2.3