diff options
author | John Stultz <john.stultz@linaro.org> | 2011-04-05 12:21:10 -0700 |
---|---|---|
committer | John Stultz <john.stultz@linaro.org> | 2011-04-05 12:21:10 -0700 |
commit | 4ce7ea0bfbb301ffb79154b6cecd2ef030db4cdf (patch) | |
tree | a7ad87580793912a9da208e7a9ea21d6c7768f08 /fs | |
parent | 4450182f400f1a5f50b1680faec25af2315c2849 (diff) | |
parent | 7c4bc9c2662c6d9840afed0e29eb01314af9bb78 (diff) |
Merge branch 'upstream/linaro.38' into linaro-android.38
Conflicts:
arch/arm/kernel/signal.c
drivers/mmc/card/block.c
drivers/mtd/nand/Kconfig
include/linux/amba/mmci.h
kernel/printk.c
mm/shmem.c
net/socket.c
Diffstat (limited to 'fs')
-rw-r--r-- | fs/aio.c | 4 | ||||
-rw-r--r-- | fs/buffer.c | 6 | ||||
-rw-r--r-- | fs/compat.c | 2 | ||||
-rw-r--r-- | fs/dcache.c | 98 | ||||
-rw-r--r-- | fs/exec.c | 7 | ||||
-rw-r--r-- | fs/ext3/namei.c | 2 | ||||
-rw-r--r-- | fs/ext3/super.c | 7 | ||||
-rw-r--r-- | fs/ext4/super.c | 8 | ||||
-rw-r--r-- | fs/ioctl.c | 5 | ||||
-rw-r--r-- | fs/namespace.c | 16 | ||||
-rw-r--r-- | fs/nfs/write.c | 31 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 4 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 155 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 5 | ||||
-rw-r--r-- | fs/open.c | 6 | ||||
-rw-r--r-- | fs/partitions/osf.c | 2 | ||||
-rw-r--r-- | fs/proc/array.c | 4 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 4 | ||||
-rw-r--r-- | fs/read_write.c | 28 | ||||
-rw-r--r-- | fs/select.c | 47 | ||||
-rw-r--r-- | fs/seq_file.c | 44 | ||||
-rw-r--r-- | fs/splice.c | 1 | ||||
-rw-r--r-- | fs/super.c | 2 | ||||
-rw-r--r-- | fs/sync.c | 4 |
24 files changed, 322 insertions, 170 deletions
@@ -520,7 +520,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) ctx->reqs_active--; if (unlikely(!ctx->reqs_active && ctx->dead)) - wake_up(&ctx->wait); + wake_up_all(&ctx->wait); } static void aio_fput_routine(struct work_struct *data) @@ -1229,7 +1229,7 @@ static void io_destroy(struct kioctx *ioctx) * by other CPUs at this point. Right now, we rely on the * locking done by the above calls to ensure this consistency. */ - wake_up(&ioctx->wait); + wake_up_all(&ioctx->wait); put_ioctx(ioctx); /* once for the lookup */ } diff --git a/fs/buffer.c b/fs/buffer.c index 2219a76e2ca..5d0c2c6045c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -41,11 +41,15 @@ #include <linux/bitops.h> #include <linux/mpage.h> #include <linux/bit_spinlock.h> +#include <trace/fs.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) +DEFINE_TRACE(fs_buffer_wait_start); +DEFINE_TRACE(fs_buffer_wait_end); + inline void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) { @@ -90,7 +94,9 @@ EXPORT_SYMBOL(unlock_buffer); */ void __wait_on_buffer(struct buffer_head * bh) { + trace_fs_buffer_wait_start(bh); wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); + trace_fs_buffer_wait_end(bh); } EXPORT_SYMBOL(__wait_on_buffer); diff --git a/fs/compat.c b/fs/compat.c index 691c3fd8ce1..933042d14e6 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -50,6 +50,7 @@ #include <linux/fs_struct.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include <trace/fs.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -1533,6 +1534,7 @@ int compat_do_execve(char * filename, if (retval < 0) goto out; + trace_fs_exec(filename); /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/dcache.c b/fs/dcache.c index 611ffe928c0..1baddc1cec4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(parent->d_lock) __releases(dentry->d_inode->i_lock) { - dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); + /* + * Inform try_to_ascend() that we are no longer attached to the + * dentry tree + */ + dentry->d_flags |= DCACHE_DISCONNECTED; if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb) } /* + * This tries to ascend one level of parenthood, but + * we can race with renaming, so we need to re-check + * the parenthood after dropping the lock and check + * that the sequence number still matches. + */ +static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) +{ + struct dentry *new = old->d_parent; + + rcu_read_lock(); + spin_unlock(&old->d_lock); + spin_lock(&new->d_lock); + + /* + * might go back up the wrong parent if we have had a rename + * or deletion + */ + if (new != old->d_parent || + (old->d_flags & DCACHE_DISCONNECTED) || + (!locked && read_seqretry(&rename_lock, seq))) { + spin_unlock(&new->d_lock); + new = NULL; + } + rcu_read_unlock(); + return new; +} + + +/* * Search for at least 1 mount point in the dentry's subdirs. * We descend to the next level whenever the d_subdirs * list is non-empty and continue searching. @@ -1066,24 +1099,10 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + struct dentry *child = this_parent; + this_parent = try_to_ascend(this_parent, locked, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } @@ -1181,24 +1200,10 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + struct dentry *child = this_parent; + this_parent = try_to_ascend(this_parent, locked, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } @@ -1607,10 +1612,13 @@ struct dentry *d_obtain_alias(struct inode *inode) __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); spin_unlock(&tmp->d_lock); spin_unlock(&inode->i_lock); + security_d_instantiate(tmp, inode); return tmp; out_iput: + if (res && !IS_ERR(res)) + security_d_instantiate(res, inode); iput(inode); return res; } @@ -2942,28 +2950,14 @@ resume: spin_unlock(&dentry->d_lock); } if (this_parent != root) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; + struct dentry *child = this_parent; if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { this_parent->d_flags |= DCACHE_GENOCIDE; this_parent->d_count--; } - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + this_parent = try_to_ascend(this_parent, locked, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } diff --git a/fs/exec.c b/fs/exec.c index 52a447d9b6a..9a92bbe142d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -55,6 +55,7 @@ #include <linux/fs_struct.h> #include <linux/pipe_fs_i.h> #include <linux/oom.h> +#include <trace/fs.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -77,6 +78,11 @@ static atomic_t call_count = ATOMIC_INIT(1); static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); +/* + * Also used in compat.c. + */ +DEFINE_TRACE(fs_exec); + int __register_binfmt(struct linux_binfmt * fmt, int insert) { if (!fmt) @@ -1447,6 +1453,7 @@ int do_execve(const char * filename, if (retval < 0) goto out; + trace_fs_exec(filename); /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index b27ba71810e..75c968eaf90 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1540,8 +1540,8 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, goto cleanup; node2 = (struct dx_node *)(bh2->b_data); entries2 = node2->entries; + memset(&node2->fake, 0, sizeof(struct fake_dirent)); node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize); - node2->fake.inode = 0; BUFFER_TRACE(frame->bh, "get_write_access"); err = ext3_journal_get_write_access(handle, frame->bh); if (err) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 85c8cc8f247..0d62f29f213 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1464,6 +1464,13 @@ static void ext3_orphan_cleanup (struct super_block * sb, return; } + /* Check if feature set allows readwrite operations */ + if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) { + ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " + "unknown ROCOMPAT features"); + return; + } + if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { if (es->s_last_orphan) jbd_debug(1, "Errors on filesystem, " diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f6a318f836b..4381efee3db 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -75,6 +75,7 @@ static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); +static int ext4_feature_set_ok(struct super_block *sb, int readonly); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); @@ -2120,6 +2121,13 @@ static void ext4_orphan_cleanup(struct super_block *sb, return; } + /* Check if feature set would not allow a r/w mount */ + if (!ext4_feature_set_ok(sb, 0)) { + ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " + "unknown ROCOMPAT features"); + return; + } + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { if (es->s_last_orphan) jbd_debug(1, "Errors on filesystem, " diff --git a/fs/ioctl.c b/fs/ioctl.c index 1eebeb72b20..a1fecf33b11 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -15,9 +15,12 @@ #include <linux/writeback.h> #include <linux/buffer_head.h> #include <linux/falloc.h> +#include <trace/fs.h> #include <asm/ioctls.h> +DEFINE_TRACE(fs_ioctl); + /* So that the fiemap access checks can't overflow on 32 bit machines. */ #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) @@ -616,6 +619,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) if (!filp) goto out; + trace_fs_ioctl(fd, cmd, arg); + error = security_file_ioctl(filp, cmd, arg); if (error) goto out_fput; diff --git a/fs/namespace.c b/fs/namespace.c index d1edf26025d..445534be024 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2469,9 +2469,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, error = user_path_dir(new_root, &new); if (error) goto out0; - error = -EINVAL; - if (!check_mnt(new.mnt)) - goto out1; error = user_path_dir(put_old, &old); if (error) @@ -2491,7 +2488,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, IS_MNT_SHARED(new.mnt->mnt_parent) || IS_MNT_SHARED(root.mnt->mnt_parent)) goto out2; - if (!check_mnt(root.mnt)) + if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) goto out2; error = -ENOENT; if (cant_mount(old.dentry)) @@ -2515,19 +2512,19 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out2; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; - br_write_lock(vfsmount_lock); if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) - goto out3; /* already mounted on put_old */ + goto out2; /* already mounted on put_old */ if (tmp->mnt_parent == new.mnt) break; tmp = tmp->mnt_parent; } if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) - goto out3; + goto out2; } else if (!is_subdir(old.dentry, new.dentry)) - goto out3; + goto out2; + br_write_lock(vfsmount_lock); detach_mnt(new.mnt, &parent_path); detach_mnt(root.mnt, &root_parent); /* mount old root on put_old */ @@ -2550,9 +2547,6 @@ out1: path_put(&new); out0: return error; -out3: - br_write_unlock(vfsmount_lock); - goto out2; } static void __init init_mount_tree(void) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 42b92d7a9cc..b5fcbf7da6f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1214,13 +1214,17 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { + int ret; + if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) return 1; - if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, - NFS_INO_COMMIT, nfs_wait_bit_killable, - TASK_KILLABLE)) - return 1; - return 0; + if (!may_wait) + return 0; + ret = out_of_line_wait_on_bit_lock(&nfsi->flags, + NFS_INO_COMMIT, + nfs_wait_bit_killable, + TASK_KILLABLE); + return (ret < 0) ? ret : 1; } static void nfs_commit_clear_lock(struct nfs_inode *nfsi) @@ -1396,9 +1400,10 @@ int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); int may_wait = how & FLUSH_SYNC; - int res = 0; + int res; - if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) + res = nfs_commit_set_lock(NFS_I(inode), may_wait); + if (res <= 0) goto out_mark_dirty; spin_lock(&inode->i_lock); res = nfs_scan_commit(inode, &head, 0, 0); @@ -1407,12 +1412,14 @@ int nfs_commit_inode(struct inode *inode, int how) int error = nfs_commit_list(inode, &head, how); if (error < 0) return error; - if (may_wait) - wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - else + if (!may_wait) goto out_mark_dirty; + error = wait_on_bit(&NFS_I(inode)->flags, + NFS_INO_COMMIT, + nfs_wait_bit_killable, + TASK_KILLABLE); + if (error < 0) + return error; } else nfs_commit_clear_lock(NFS_I(inode)); return res; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index db52546143d..5fcb1396a7e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -984,8 +984,8 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, void *); enum nfsd4_op_flags { ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ - ALLOWED_ON_ABSENT_FS = 2 << 0, /* ops processed on absent fs */ - ALLOWED_AS_FIRST_OP = 3 << 0, /* ops reqired first in compound */ + ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ + ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ }; struct nfsd4_operation { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7b566ec14e1..f0e448a512c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -316,64 +316,6 @@ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; static struct list_head client_lru; static struct list_head close_lru; -static void unhash_generic_stateid(struct nfs4_stateid *stp) -{ - list_del(&stp->st_hash); - list_del(&stp->st_perfile); - list_del(&stp->st_perstateowner); -} - -static void free_generic_stateid(struct nfs4_stateid *stp) -{ - put_nfs4_file(stp->st_file); - kmem_cache_free(stateid_slab, stp); -} - -static void release_lock_stateid(struct nfs4_stateid *stp) -{ - struct file *file; - - unhash_generic_stateid(stp); - file = find_any_file(stp->st_file); - if (file) - locks_remove_posix(file, (fl_owner_t)stp->st_stateowner); - free_generic_stateid(stp); -} - -static void unhash_lockowner(struct nfs4_stateowner *sop) -{ - struct nfs4_stateid *stp; - - list_del(&sop->so_idhash); - list_del(&sop->so_strhash); - list_del(&sop->so_perstateid); - while (!list_empty(&sop->so_stateids)) { - stp = list_first_entry(&sop->so_stateids, - struct nfs4_stateid, st_perstateowner); - release_lock_stateid(stp); - } -} - -static void release_lockowner(struct nfs4_stateowner *sop) -{ - unhash_lockowner(sop); - nfs4_put_stateowner(sop); -} - -static void -release_stateid_lockowners(struct nfs4_stateid *open_stp) -{ - struct nfs4_stateowner *lock_sop; - - while (!list_empty(&open_stp->st_lockowners)) { - lock_sop = list_entry(open_stp->st_lockowners.next, - struct nfs4_stateowner, so_perstateid); - /* list_del(&open_stp->st_lockowners); */ - BUG_ON(lock_sop->so_is_open_owner); - release_lockowner(lock_sop); - } -} - /* * We store the NONE, READ, WRITE, and BOTH bits separately in the * st_{access,deny}_bmap field of the stateid, in order to track not @@ -446,13 +388,71 @@ static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp) return nfs4_access_to_omode(access); } -static void release_open_stateid(struct nfs4_stateid *stp) +static void unhash_generic_stateid(struct nfs4_stateid *stp) +{ + list_del(&stp->st_hash); + list_del(&stp->st_perfile); + list_del(&stp->st_perstateowner); +} + +static void free_generic_stateid(struct nfs4_stateid *stp) { int oflag = nfs4_access_bmap_to_omode(stp); + nfs4_file_put_access(stp->st_file, oflag); + put_nfs4_file(stp->st_file); + kmem_cache_free(stateid_slab, stp); +} + +static void release_lock_stateid(struct nfs4_stateid *stp) +{ + struct file *file; + + unhash_generic_stateid(stp); + file = find_any_file(stp->st_file); + if (file) + locks_remove_posix(file, (fl_owner_t)stp->st_stateowner); + free_generic_stateid(stp); +} + +static void unhash_lockowner(struct nfs4_stateowner *sop) +{ + struct nfs4_stateid *stp; + + list_del(&sop->so_idhash); + list_del(&sop->so_strhash); + list_del(&sop->so_perstateid); + while (!list_empty(&sop->so_stateids)) { + stp = list_first_entry(&sop->so_stateids, + struct nfs4_stateid, st_perstateowner); + release_lock_stateid(stp); + } +} + +static void release_lockowner(struct nfs4_stateowner *sop) +{ + unhash_lockowner(sop); + nfs4_put_stateowner(sop); +} + +static void +release_stateid_lockowners(struct nfs4_stateid *open_stp) +{ + struct nfs4_stateowner *lock_sop; + + while (!list_empty(&open_stp->st_lockowners)) { + lock_sop = list_entry(open_stp->st_lockowners.next, + struct nfs4_stateowner, so_perstateid); + /* list_del(&open_stp->st_lockowners); */ + BUG_ON(lock_sop->so_is_open_owner); + release_lockowner(lock_sop); + } +} + +static void release_open_stateid(struct nfs4_stateid *stp) +{ unhash_generic_stateid(stp); release_stateid_lockowners(stp); - nfs4_file_put_access(stp->st_file, oflag); free_generic_stateid(stp); } @@ -3735,6 +3735,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc stp->st_stateid.si_stateownerid = sop->so_id; stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; + stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; @@ -3749,6 +3750,17 @@ check_lock_length(u64 offset, u64 length) LOFF_OVERFLOW(offset, length))); } +static void get_lock_access(struct nfs4_stateid *lock_stp, u32 access) +{ + struct nfs4_file *fp = lock_stp->st_file; + int oflag = nfs4_access_to_omode(access); + + if (test_bit(access, &lock_stp->st_access_bmap)) + return; + nfs4_file_get_access(fp, oflag); + __set_bit(access, &lock_stp->st_access_bmap); +} + /* * LOCK operation */ @@ -3765,7 +3777,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock conflock; __be32 status = 0; unsigned int strhashval; - unsigned int cmd; int err; dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", @@ -3847,22 +3858,18 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (lock->lk_type) { case NFS4_READ_LT: case NFS4_READW_LT: - if (find_readable_file(lock_stp->st_file)) { - nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_READ); - filp = find_readable_file(lock_stp->st_file); - } + filp = find_readable_file(lock_stp->st_file); + if (filp) + get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); file_lock.fl_type = F_RDLCK; - cmd = F_SETLK; - break; + break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: - if (find_writeable_file(lock_stp->st_file)) { - nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_WRITE); - filp = find_writeable_file(lock_stp->st_file); - } + filp = find_writeable_file(lock_stp->st_file); + if (filp) + get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); file_lock.fl_type = F_WRLCK; - cmd = F_SETLK; - break; + break; default: status = nfserr_inval; goto out; @@ -3886,7 +3893,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * Note: locks.c uses the BKL to protect the inode's lock list. */ - err = vfs_lock_file(filp, cmd, &file_lock, &conflock); + err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock); switch (-err) { case 0: /* success! */ update_stateid(&lock_stp->st_stateid); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 615f0a9f060..c6766af00d9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, u32 dummy; char *machine_name; - int i, j; + int i; int nr_secflavs; READ_BUF(16); @@ -1215,8 +1215,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, READ_BUF(4); READ32(dummy); READ_BUF(dummy * 4); - for (j = 0; j < dummy; ++j) - READ32(dummy); break; case RPC_AUTH_GSS: dprintk("RPC_AUTH_GSS callback secflavor " @@ -1232,7 +1230,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, READ_BUF(4); READ32(dummy); READ_BUF(dummy); - p += XDR_QUADLEN(dummy); break; default: dprintk("Illegal callback secflavor\n"); diff --git a/fs/open.c b/fs/open.c index b47aab39c05..575c92f3b81 100644 --- a/fs/open.c +++ b/fs/open.c @@ -30,9 +30,13 @@ #include <linux/fs_struct.h> #include <linux/ima.h> #include <linux/dnotify.h> +#include <trace/fs.h> #include "internal.h" +DEFINE_TRACE(fs_open); +DEFINE_TRACE(fs_close); + int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, struct file *filp) { @@ -906,6 +910,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) fsnotify_open(f); fd_install(fd, f); } + trace_fs_open(fd, tmp); } putname(tmp); } @@ -995,6 +1000,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd) filp = fdt->fd[fd]; if (!filp) goto out_unlock; + trace_fs_close(fd); rcu_assign_pointer(fdt->fd[fd], NULL); FD_CLR(fd, fdt->close_on_exec); __put_unused_fd(files, fd); diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c index be03a0b08b4..764b86a0196 100644 --- a/fs/partitions/osf.c +++ b/fs/partitions/osf.c @@ -10,7 +10,7 @@ #include "check.h" #include "osf.h" -#define MAX_OSF_PARTITIONS 8 +#define MAX_OSF_PARTITIONS 18 int osf_partition(struct parsed_partitions *state) { diff --git a/fs/proc/array.c b/fs/proc/array.c index 7c99c1cf7e5..5e4f776b091 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -489,8 +489,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, vsize, mm ? get_mm_rss(mm) : 0, rsslim, - mm ? mm->start_code : 0, - mm ? mm->end_code : 0, + mm ? (permitted ? mm->start_code : 1) : 0, + mm ? (permitted ? mm->end_code : 1) : 0, (permitted && mm) ? mm->start_stack : 0, esp, eip, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 60b914860f8..f269ee673c8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -249,8 +249,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) const char *name = arch_vma_name(vma); if (!name) { if (mm) { - if (vma->vm_start <= mm->start_brk && - vma->vm_end >= mm->brk) { + if (vma->vm_start <= mm->brk && + vma->vm_end >= mm->start_brk) { name = "[heap]"; } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { diff --git a/fs/read_write.c b/fs/read_write.c index 5520f8ad550..6a3f7f9c9db 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -15,6 +15,7 @@ #include <linux/syscalls.h> #include <linux/pagemap.h> #include <linux/splice.h> +#include <trace/fs.h> #include "read_write.h" #include <asm/uaccess.h> @@ -30,6 +31,15 @@ const struct file_operations generic_ro_fops = { EXPORT_SYMBOL(generic_ro_fops); +DEFINE_TRACE(fs_lseek); +DEFINE_TRACE(fs_llseek); +DEFINE_TRACE(fs_read); +DEFINE_TRACE(fs_write); +DEFINE_TRACE(fs_pread64); +DEFINE_TRACE(fs_pwrite64); +DEFINE_TRACE(fs_readv); +DEFINE_TRACE(fs_writev); + static inline int unsigned_offsets(struct file *file) { return file->f_mode & FMODE_UNSIGNED_OFFSET; @@ -187,6 +197,9 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } + + trace_fs_lseek(fd, offset, origin); + fput_light(file, fput_needed); bad: return retval; @@ -214,6 +227,8 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, origin); + trace_fs_llseek(fd, offset, origin); + retval = (int)offset; if (offset >= 0) { retval = -EFAULT; @@ -409,6 +424,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) if (file) { loff_t pos = file_pos_read(file); ret = vfs_read(file, buf, count, &pos); + trace_fs_read(fd, buf, count, ret); file_pos_write(file, pos); fput_light(file, fput_needed); } @@ -427,6 +443,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, if (file) { loff_t pos = file_pos_read(file); ret = vfs_write(file, buf, count, &pos); + trace_fs_write(fd, buf, count, ret); file_pos_write(file, pos); fput_light(file, fput_needed); } @@ -447,8 +464,11 @@ SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, file = fget_light(fd, &fput_needed); if (file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PREAD) + if (file->f_mode & FMODE_PREAD) { ret = vfs_read(file, buf, count, &pos); + trace_fs_pread64(fd, buf, count, pos, ret); + } + fput_light(file, fput_needed); } @@ -476,8 +496,10 @@ SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, file = fget_light(fd, &fput_needed); if (file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PWRITE) + if (file->f_mode & FMODE_PWRITE) { ret = vfs_write(file, buf, count, &pos); + trace_fs_pwrite64(fd, buf, count, pos, ret); + } fput_light(file, fput_needed); } @@ -736,6 +758,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, if (file) { loff_t pos = file_pos_read(file); ret = vfs_readv(file, vec, vlen, &pos); + trace_fs_readv(fd, vec, vlen, ret); file_pos_write(file, pos); fput_light(file, fput_needed); } @@ -757,6 +780,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, if (file) { loff_t pos = file_pos_read(file); ret = vfs_writev(file, vec, vlen, &pos); + trace_fs_writev(fd, vec, vlen, ret); file_pos_write(file, pos); fput_light(file, fput_needed); } diff --git a/fs/select.c b/fs/select.c index e56560d2b08..64c2404f2cc 100644 --- a/fs/select.c +++ b/fs/select.c @@ -26,6 +26,7 @@ #include <linux/fs.h> #include <linux/rcupdate.h> #include <linux/hrtimer.h> +#include <trace/fs.h> #include <asm/uaccess.h> @@ -98,6 +99,9 @@ struct poll_table_page { #define POLL_TABLE_FULL(table) \ ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) +DEFINE_TRACE(fs_select); +DEFINE_TRACE(fs_poll); + /* * Ok, Peter made a complicated, but straightforward multiple_wait() function. * I have rewritten this, taking some shortcuts: This code may not be easy to @@ -112,6 +116,9 @@ struct poll_table_page { */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p); +static void __pollwait_exclusive(struct file *filp, + wait_queue_head_t *wait_address, + poll_table *p); void poll_initwait(struct poll_wqueues *pwq) { @@ -152,6 +159,20 @@ void poll_freewait(struct poll_wqueues *pwq) } EXPORT_SYMBOL(poll_freewait); +/** + * poll_wait_set_exclusive - set poll wait queue to exclusive + * + * Sets up a poll wait queue to use exclusive wakeups. This is useful to + * wake up only one waiter at each wakeup. Used to work-around "thundering herd" + * problem. + */ +void poll_wait_set_exclusive(poll_table *p) +{ + if (p) + init_poll_funcptr(p, __pollwait_exclusive); +} +EXPORT_SYMBOL(poll_wait_set_exclusive); + static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) { struct poll_table_page *table = p->table; @@ -213,8 +234,10 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) } /* Add a new entry */ -static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, - poll_table *p) +static void __pollwait_common(struct file *filp, + wait_queue_head_t *wait_address, + poll_table *p, + int exclusive) { struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); struct poll_table_entry *entry = poll_get_entry(pwq); @@ -226,7 +249,23 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, entry->key = p->key; init_waitqueue_func_entry(&entry->wait, pollwake); entry->wait.private = pwq; - add_wait_queue(wait_address, &entry->wait); + if (!exclusive) + add_wait_queue(wait_address, &entry->wait); + else + add_wait_queue_exclusive(wait_address, &entry->wait); +} + +static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, + poll_table *p) +{ + __pollwait_common(filp, wait_address, p, 0); +} + +static void __pollwait_exclusive(struct file *filp, + wait_queue_head_t *wait_address, + poll_table *p) +{ + __pollwait_common(filp, wait_address, p, 1); } int poll_schedule_timeout(struct poll_wqueues *pwq, int state, @@ -450,6 +489,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) file = fget_light(i, &fput_needed); if (file) { f_op = file->f_op; + trace_fs_select(i, end_time); mask = DEFAULT_POLLMASK; if (f_op && f_op->poll) { wait_key_set(wait, in, out, bit); @@ -739,6 +779,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) file = fget_light(fd, &fput_needed); mask = POLLNVAL; if (file != NULL) { + trace_fs_poll(fd); mask = DEFAULT_POLLMASK; if (file->f_op && file->f_op->poll) { if (pwait) diff --git a/fs/seq_file.c b/fs/seq_file.c index 05d6b0e78c9..691c84baf4f 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -819,3 +819,47 @@ struct hlist_node *seq_hlist_next_rcu(void *v, return rcu_dereference(node->next); } EXPORT_SYMBOL(seq_hlist_next_rcu); + +struct list_head *seq_sorted_list_start(struct list_head *head, loff_t *ppos) +{ + struct list_head *lh; + + list_for_each(lh, head) + if ((unsigned long)lh >= *ppos) { + *ppos = (unsigned long)lh; + return lh; + } + return NULL; +} +EXPORT_SYMBOL(seq_sorted_list_start); + +struct list_head *seq_sorted_list_start_head(struct list_head *head, + loff_t *ppos) +{ + struct list_head *lh; + + if (!*ppos) { + *ppos = (unsigned long)head; + return head; + } + list_for_each(lh, head) + if ((unsigned long)lh >= *ppos) { + *ppos = (long)lh->prev; + return lh->prev; + } + return NULL; +} +EXPORT_SYMBOL(seq_sorted_list_start_head); + +struct list_head *seq_sorted_list_next(void *p, struct list_head *head, + loff_t *ppos) +{ + struct list_head *lh; + void *next; + + lh = ((struct list_head *)p)->next; + next = (lh == head) ? NULL : lh; + *ppos = next ? ((unsigned long)next) : (-1UL); + return next; +} +EXPORT_SYMBOL(seq_sorted_list_next); diff --git a/fs/splice.c b/fs/splice.c index 50a5d978da1..e76aac5c993 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -259,6 +259,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, return ret; } +EXPORT_SYMBOL_GPL(splice_to_pipe); static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) { diff --git a/fs/super.c b/fs/super.c index 7e9dd4cc2c0..0d89e93f654 100644 --- a/fs/super.c +++ b/fs/super.c @@ -71,6 +71,7 @@ static struct super_block *alloc_super(struct file_system_type *type) #else INIT_LIST_HEAD(&s->s_files); #endif + s->s_bdi = &default_backing_dev_info; INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -1003,6 +1004,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void } BUG_ON(!mnt->mnt_sb); WARN_ON(!mnt->mnt_sb->s_bdi); + WARN_ON(mnt->mnt_sb->s_bdi == &default_backing_dev_info); mnt->mnt_sb->s_flags |= MS_BORN; error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); diff --git a/fs/sync.c b/fs/sync.c index ba76b9623e7..412dc89163d 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -33,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) * This should be safe, as we require bdi backing to actually * write out data in the first place */ - if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info) + if (sb->s_bdi == &noop_backing_dev_info) return 0; if (sb->s_qcop && sb->s_qcop->quota_sync) @@ -79,7 +79,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem); static void sync_one_sb(struct super_block *sb, void *arg) { - if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi) + if (!(sb->s_flags & MS_RDONLY)) __sync_filesystem(sb, *(int *)arg); } /* |