summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJohn Stultz <john.stultz@linaro.org>2011-04-05 12:21:10 -0700
committerJohn Stultz <john.stultz@linaro.org>2011-04-05 12:21:10 -0700
commit4ce7ea0bfbb301ffb79154b6cecd2ef030db4cdf (patch)
treea7ad87580793912a9da208e7a9ea21d6c7768f08 /fs
parent4450182f400f1a5f50b1680faec25af2315c2849 (diff)
parent7c4bc9c2662c6d9840afed0e29eb01314af9bb78 (diff)
Merge branch 'upstream/linaro.38' into linaro-android.38
Conflicts: arch/arm/kernel/signal.c drivers/mmc/card/block.c drivers/mtd/nand/Kconfig include/linux/amba/mmci.h kernel/printk.c mm/shmem.c net/socket.c
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c4
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/compat.c2
-rw-r--r--fs/dcache.c98
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ext3/namei.c2
-rw-r--r--fs/ext3/super.c7
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/ioctl.c5
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfs/write.c31
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c155
-rw-r--r--fs/nfsd/nfs4xdr.c5
-rw-r--r--fs/open.c6
-rw-r--r--fs/partitions/osf.c2
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/read_write.c28
-rw-r--r--fs/select.c47
-rw-r--r--fs/seq_file.c44
-rw-r--r--fs/splice.c1
-rw-r--r--fs/super.c2
-rw-r--r--fs/sync.c4
24 files changed, 322 insertions, 170 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 26869cde395..88f0ed51442 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -520,7 +520,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
ctx->reqs_active--;
if (unlikely(!ctx->reqs_active && ctx->dead))
- wake_up(&ctx->wait);
+ wake_up_all(&ctx->wait);
}
static void aio_fput_routine(struct work_struct *data)
@@ -1229,7 +1229,7 @@ static void io_destroy(struct kioctx *ioctx)
* by other CPUs at this point. Right now, we rely on the
* locking done by the above calls to ensure this consistency.
*/
- wake_up(&ioctx->wait);
+ wake_up_all(&ioctx->wait);
put_ioctx(ioctx); /* once for the lookup */
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 2219a76e2ca..5d0c2c6045c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,11 +41,15 @@
#include <linux/bitops.h>
#include <linux/mpage.h>
#include <linux/bit_spinlock.h>
+#include <trace/fs.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
+DEFINE_TRACE(fs_buffer_wait_start);
+DEFINE_TRACE(fs_buffer_wait_end);
+
inline void
init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
{
@@ -90,7 +94,9 @@ EXPORT_SYMBOL(unlock_buffer);
*/
void __wait_on_buffer(struct buffer_head * bh)
{
+ trace_fs_buffer_wait_start(bh);
wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
+ trace_fs_buffer_wait_end(bh);
}
EXPORT_SYMBOL(__wait_on_buffer);
diff --git a/fs/compat.c b/fs/compat.c
index 691c3fd8ce1..933042d14e6 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -50,6 +50,7 @@
#include <linux/fs_struct.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -1533,6 +1534,7 @@ int compat_do_execve(char * filename,
if (retval < 0)
goto out;
+ trace_fs_exec(filename);
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/dcache.c b/fs/dcache.c
index 611ffe928c0..1baddc1cec4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(parent->d_lock)
__releases(dentry->d_inode->i_lock)
{
- dentry->d_parent = NULL;
list_del(&dentry->d_u.d_child);
+ /*
+ * Inform try_to_ascend() that we are no longer attached to the
+ * dentry tree
+ */
+ dentry->d_flags |= DCACHE_DISCONNECTED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
}
/*
+ * This tries to ascend one level of parenthood, but
+ * we can race with renaming, so we need to re-check
+ * the parenthood after dropping the lock and check
+ * that the sequence number still matches.
+ */
+static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
+{
+ struct dentry *new = old->d_parent;
+
+ rcu_read_lock();
+ spin_unlock(&old->d_lock);
+ spin_lock(&new->d_lock);
+
+ /*
+ * might go back up the wrong parent if we have had a rename
+ * or deletion
+ */
+ if (new != old->d_parent ||
+ (old->d_flags & DCACHE_DISCONNECTED) ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&new->d_lock);
+ new = NULL;
+ }
+ rcu_read_unlock();
+ return new;
+}
+
+
+/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1181,24 +1200,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1607,10 +1612,13 @@ struct dentry *d_obtain_alias(struct inode *inode)
__bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
spin_unlock(&tmp->d_lock);
spin_unlock(&inode->i_lock);
+ security_d_instantiate(tmp, inode);
return tmp;
out_iput:
+ if (res && !IS_ERR(res))
+ security_d_instantiate(res, inode);
iput(inode);
return res;
}
@@ -2942,28 +2950,14 @@ resume:
spin_unlock(&dentry->d_lock);
}
if (this_parent != root) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
+ struct dentry *child = this_parent;
if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
this_parent->d_flags |= DCACHE_GENOCIDE;
this_parent->d_count--;
}
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6a..9a92bbe142d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
#include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -77,6 +78,11 @@ static atomic_t call_count = ATOMIC_INIT(1);
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);
+/*
+ * Also used in compat.c.
+ */
+DEFINE_TRACE(fs_exec);
+
int __register_binfmt(struct linux_binfmt * fmt, int insert)
{
if (!fmt)
@@ -1447,6 +1453,7 @@ int do_execve(const char * filename,
if (retval < 0)
goto out;
+ trace_fs_exec(filename);
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810e..75c968eaf90 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1540,8 +1540,8 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
goto cleanup;
node2 = (struct dx_node *)(bh2->b_data);
entries2 = node2->entries;
+ memset(&node2->fake, 0, sizeof(struct fake_dirent));
node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize);
- node2->fake.inode = 0;
BUFFER_TRACE(frame->bh, "get_write_access");
err = ext3_journal_get_write_access(handle, frame->bh);
if (err)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f247..0d62f29f213 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1464,6 +1464,13 @@ static void ext3_orphan_cleanup (struct super_block * sb,
return;
}
+ /* Check if feature set allows readwrite operations */
+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) {
+ ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
+ "unknown ROCOMPAT features");
+ return;
+ }
+
if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
if (es->s_last_orphan)
jbd_debug(1, "Errors on filesystem, "
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b..4381efee3db 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -75,6 +75,7 @@ static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data);
+static int ext4_feature_set_ok(struct super_block *sb, int readonly);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
@@ -2120,6 +2121,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
return;
}
+ /* Check if feature set would not allow a r/w mount */
+ if (!ext4_feature_set_ok(sb, 0)) {
+ ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
+ "unknown ROCOMPAT features");
+ return;
+ }
+
if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
if (es->s_last_orphan)
jbd_debug(1, "Errors on filesystem, "
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1eebeb72b20..a1fecf33b11 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,9 +15,12 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
+#include <trace/fs.h>
#include <asm/ioctls.h>
+DEFINE_TRACE(fs_ioctl);
+
/* So that the fiemap access checks can't overflow on 32 bit machines. */
#define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent))
@@ -616,6 +619,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
if (!filp)
goto out;
+ trace_fs_ioctl(fd, cmd, arg);
+
error = security_file_ioctl(filp, cmd, arg);
if (error)
goto out_fput;
diff --git a/fs/namespace.c b/fs/namespace.c
index d1edf26025d..445534be024 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2469,9 +2469,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
error = user_path_dir(new_root, &new);
if (error)
goto out0;
- error = -EINVAL;
- if (!check_mnt(new.mnt))
- goto out1;
error = user_path_dir(put_old, &old);
if (error)
@@ -2491,7 +2488,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
IS_MNT_SHARED(new.mnt->mnt_parent) ||
IS_MNT_SHARED(root.mnt->mnt_parent))
goto out2;
- if (!check_mnt(root.mnt))
+ if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
goto out2;
error = -ENOENT;
if (cant_mount(old.dentry))
@@ -2515,19 +2512,19 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out2; /* not attached */
/* make sure we can reach put_old from new_root */
tmp = old.mnt;
- br_write_lock(vfsmount_lock);
if (tmp != new.mnt) {
for (;;) {
if (tmp->mnt_parent == tmp)
- goto out3; /* already mounted on put_old */
+ goto out2; /* already mounted on put_old */
if (tmp->mnt_parent == new.mnt)
break;
tmp = tmp->mnt_parent;
}
if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
- goto out3;
+ goto out2;
} else if (!is_subdir(old.dentry, new.dentry))
- goto out3;
+ goto out2;
+ br_write_lock(vfsmount_lock);
detach_mnt(new.mnt, &parent_path);
detach_mnt(root.mnt, &root_parent);
/* mount old root on put_old */
@@ -2550,9 +2547,6 @@ out1:
path_put(&new);
out0:
return error;
-out3:
- br_write_unlock(vfsmount_lock);
- goto out2;
}
static void __init init_mount_tree(void)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 42b92d7a9cc..b5fcbf7da6f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1214,13 +1214,17 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
{
+ int ret;
+
if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
return 1;
- if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
- NFS_INO_COMMIT, nfs_wait_bit_killable,
- TASK_KILLABLE))
- return 1;
- return 0;
+ if (!may_wait)
+ return 0;
+ ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
+ NFS_INO_COMMIT,
+ nfs_wait_bit_killable,
+ TASK_KILLABLE);
+ return (ret < 0) ? ret : 1;
}
static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
@@ -1396,9 +1400,10 @@ int nfs_commit_inode(struct inode *inode, int how)
{
LIST_HEAD(head);
int may_wait = how & FLUSH_SYNC;
- int res = 0;
+ int res;
- if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+ res = nfs_commit_set_lock(NFS_I(inode), may_wait);
+ if (res <= 0)
goto out_mark_dirty;
spin_lock(&inode->i_lock);
res = nfs_scan_commit(inode, &head, 0, 0);
@@ -1407,12 +1412,14 @@ int nfs_commit_inode(struct inode *inode, int how)
int error = nfs_commit_list(inode, &head, how);
if (error < 0)
return error;
- if (may_wait)
- wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
- nfs_wait_bit_killable,
- TASK_KILLABLE);
- else
+ if (!may_wait)
goto out_mark_dirty;
+ error = wait_on_bit(&NFS_I(inode)->flags,
+ NFS_INO_COMMIT,
+ nfs_wait_bit_killable,
+ TASK_KILLABLE);
+ if (error < 0)
+ return error;
} else
nfs_commit_clear_lock(NFS_I(inode));
return res;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index db52546143d..5fcb1396a7e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -984,8 +984,8 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
void *);
enum nfsd4_op_flags {
ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
- ALLOWED_ON_ABSENT_FS = 2 << 0, /* ops processed on absent fs */
- ALLOWED_AS_FIRST_OP = 3 << 0, /* ops reqired first in compound */
+ ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
+ ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
};
struct nfsd4_operation {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7b566ec14e1..f0e448a512c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -316,64 +316,6 @@ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE];
static struct list_head client_lru;
static struct list_head close_lru;
-static void unhash_generic_stateid(struct nfs4_stateid *stp)
-{
- list_del(&stp->st_hash);
- list_del(&stp->st_perfile);
- list_del(&stp->st_perstateowner);
-}
-
-static void free_generic_stateid(struct nfs4_stateid *stp)
-{
- put_nfs4_file(stp->st_file);
- kmem_cache_free(stateid_slab, stp);
-}
-
-static void release_lock_stateid(struct nfs4_stateid *stp)
-{
- struct file *file;
-
- unhash_generic_stateid(stp);
- file = find_any_file(stp->st_file);
- if (file)
- locks_remove_posix(file, (fl_owner_t)stp->st_stateowner);
- free_generic_stateid(stp);
-}
-
-static void unhash_lockowner(struct nfs4_stateowner *sop)
-{
- struct nfs4_stateid *stp;
-
- list_del(&sop->so_idhash);
- list_del(&sop->so_strhash);
- list_del(&sop->so_perstateid);
- while (!list_empty(&sop->so_stateids)) {
- stp = list_first_entry(&sop->so_stateids,
- struct nfs4_stateid, st_perstateowner);
- release_lock_stateid(stp);
- }
-}
-
-static void release_lockowner(struct nfs4_stateowner *sop)
-{
- unhash_lockowner(sop);
- nfs4_put_stateowner(sop);
-}
-
-static void
-release_stateid_lockowners(struct nfs4_stateid *open_stp)
-{
- struct nfs4_stateowner *lock_sop;
-
- while (!list_empty(&open_stp->st_lockowners)) {
- lock_sop = list_entry(open_stp->st_lockowners.next,
- struct nfs4_stateowner, so_perstateid);
- /* list_del(&open_stp->st_lockowners); */
- BUG_ON(lock_sop->so_is_open_owner);
- release_lockowner(lock_sop);
- }
-}
-
/*
* We store the NONE, READ, WRITE, and BOTH bits separately in the
* st_{access,deny}_bmap field of the stateid, in order to track not
@@ -446,13 +388,71 @@ static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp)
return nfs4_access_to_omode(access);
}
-static void release_open_stateid(struct nfs4_stateid *stp)
+static void unhash_generic_stateid(struct nfs4_stateid *stp)
+{
+ list_del(&stp->st_hash);
+ list_del(&stp->st_perfile);
+ list_del(&stp->st_perstateowner);
+}
+
+static void free_generic_stateid(struct nfs4_stateid *stp)
{
int oflag = nfs4_access_bmap_to_omode(stp);
+ nfs4_file_put_access(stp->st_file, oflag);
+ put_nfs4_file(stp->st_file);
+ kmem_cache_free(stateid_slab, stp);
+}
+
+static void release_lock_stateid(struct nfs4_stateid *stp)
+{
+ struct file *file;
+
+ unhash_generic_stateid(stp);
+ file = find_any_file(stp->st_file);
+ if (file)
+ locks_remove_posix(file, (fl_owner_t)stp->st_stateowner);
+ free_generic_stateid(stp);
+}
+
+static void unhash_lockowner(struct nfs4_stateowner *sop)
+{
+ struct nfs4_stateid *stp;
+
+ list_del(&sop->so_idhash);
+ list_del(&sop->so_strhash);
+ list_del(&sop->so_perstateid);
+ while (!list_empty(&sop->so_stateids)) {
+ stp = list_first_entry(&sop->so_stateids,
+ struct nfs4_stateid, st_perstateowner);
+ release_lock_stateid(stp);
+ }
+}
+
+static void release_lockowner(struct nfs4_stateowner *sop)
+{
+ unhash_lockowner(sop);
+ nfs4_put_stateowner(sop);
+}
+
+static void
+release_stateid_lockowners(struct nfs4_stateid *open_stp)
+{
+ struct nfs4_stateowner *lock_sop;
+
+ while (!list_empty(&open_stp->st_lockowners)) {
+ lock_sop = list_entry(open_stp->st_lockowners.next,
+ struct nfs4_stateowner, so_perstateid);
+ /* list_del(&open_stp->st_lockowners); */
+ BUG_ON(lock_sop->so_is_open_owner);
+ release_lockowner(lock_sop);
+ }
+}
+
+static void release_open_stateid(struct nfs4_stateid *stp)
+{
unhash_generic_stateid(stp);
release_stateid_lockowners(stp);
- nfs4_file_put_access(stp->st_file, oflag);
free_generic_stateid(stp);
}
@@ -3735,6 +3735,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
stp->st_stateid.si_stateownerid = sop->so_id;
stp->st_stateid.si_fileid = fp->fi_id;
stp->st_stateid.si_generation = 0;
+ stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
@@ -3749,6 +3750,17 @@ check_lock_length(u64 offset, u64 length)
LOFF_OVERFLOW(offset, length)));
}
+static void get_lock_access(struct nfs4_stateid *lock_stp, u32 access)
+{
+ struct nfs4_file *fp = lock_stp->st_file;
+ int oflag = nfs4_access_to_omode(access);
+
+ if (test_bit(access, &lock_stp->st_access_bmap))
+ return;
+ nfs4_file_get_access(fp, oflag);
+ __set_bit(access, &lock_stp->st_access_bmap);
+}
+
/*
* LOCK operation
*/
@@ -3765,7 +3777,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct file_lock conflock;
__be32 status = 0;
unsigned int strhashval;
- unsigned int cmd;
int err;
dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
@@ -3847,22 +3858,18 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
switch (lock->lk_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
- if (find_readable_file(lock_stp->st_file)) {
- nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_READ);
- filp = find_readable_file(lock_stp->st_file);
- }
+ filp = find_readable_file(lock_stp->st_file);
+ if (filp)
+ get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
file_lock.fl_type = F_RDLCK;
- cmd = F_SETLK;
- break;
+ break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
- if (find_writeable_file(lock_stp->st_file)) {
- nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_WRITE);
- filp = find_writeable_file(lock_stp->st_file);
- }
+ filp = find_writeable_file(lock_stp->st_file);
+ if (filp)
+ get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
file_lock.fl_type = F_WRLCK;
- cmd = F_SETLK;
- break;
+ break;
default:
status = nfserr_inval;
goto out;
@@ -3886,7 +3893,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* Note: locks.c uses the BKL to protect the inode's lock list.
*/
- err = vfs_lock_file(filp, cmd, &file_lock, &conflock);
+ err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock);
switch (-err) {
case 0: /* success! */
update_stateid(&lock_stp->st_stateid);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 615f0a9f060..c6766af00d9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
u32 dummy;
char *machine_name;
- int i, j;
+ int i;
int nr_secflavs;
READ_BUF(16);
@@ -1215,8 +1215,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
READ_BUF(4);
READ32(dummy);
READ_BUF(dummy * 4);
- for (j = 0; j < dummy; ++j)
- READ32(dummy);
break;
case RPC_AUTH_GSS:
dprintk("RPC_AUTH_GSS callback secflavor "
@@ -1232,7 +1230,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
READ_BUF(4);
READ32(dummy);
READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
break;
default:
dprintk("Illegal callback secflavor\n");
diff --git a/fs/open.c b/fs/open.c
index b47aab39c05..575c92f3b81 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,9 +30,13 @@
#include <linux/fs_struct.h>
#include <linux/ima.h>
#include <linux/dnotify.h>
+#include <trace/fs.h>
#include "internal.h"
+DEFINE_TRACE(fs_open);
+DEFINE_TRACE(fs_close);
+
int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
struct file *filp)
{
@@ -906,6 +910,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
fsnotify_open(f);
fd_install(fd, f);
}
+ trace_fs_open(fd, tmp);
}
putname(tmp);
}
@@ -995,6 +1000,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
filp = fdt->fd[fd];
if (!filp)
goto out_unlock;
+ trace_fs_close(fd);
rcu_assign_pointer(fdt->fd[fd], NULL);
FD_CLR(fd, fdt->close_on_exec);
__put_unused_fd(files, fd);
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index be03a0b08b4..764b86a0196 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,7 +10,7 @@
#include "check.h"
#include "osf.h"
-#define MAX_OSF_PARTITIONS 8
+#define MAX_OSF_PARTITIONS 18
int osf_partition(struct parsed_partitions *state)
{
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7c99c1cf7e5..5e4f776b091 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -489,8 +489,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
vsize,
mm ? get_mm_rss(mm) : 0,
rsslim,
- mm ? mm->start_code : 0,
- mm ? mm->end_code : 0,
+ mm ? (permitted ? mm->start_code : 1) : 0,
+ mm ? (permitted ? mm->end_code : 1) : 0,
(permitted && mm) ? mm->start_stack : 0,
esp,
eip,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 60b914860f8..f269ee673c8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -249,8 +249,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
const char *name = arch_vma_name(vma);
if (!name) {
if (mm) {
- if (vma->vm_start <= mm->start_brk &&
- vma->vm_end >= mm->brk) {
+ if (vma->vm_start <= mm->brk &&
+ vma->vm_end >= mm->start_brk) {
name = "[heap]";
} else if (vma->vm_start <= mm->start_stack &&
vma->vm_end >= mm->start_stack) {
diff --git a/fs/read_write.c b/fs/read_write.c
index 5520f8ad550..6a3f7f9c9db 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
+#include <trace/fs.h>
#include "read_write.h"
#include <asm/uaccess.h>
@@ -30,6 +31,15 @@ const struct file_operations generic_ro_fops = {
EXPORT_SYMBOL(generic_ro_fops);
+DEFINE_TRACE(fs_lseek);
+DEFINE_TRACE(fs_llseek);
+DEFINE_TRACE(fs_read);
+DEFINE_TRACE(fs_write);
+DEFINE_TRACE(fs_pread64);
+DEFINE_TRACE(fs_pwrite64);
+DEFINE_TRACE(fs_readv);
+DEFINE_TRACE(fs_writev);
+
static inline int unsigned_offsets(struct file *file)
{
return file->f_mode & FMODE_UNSIGNED_OFFSET;
@@ -187,6 +197,9 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
}
+
+ trace_fs_lseek(fd, offset, origin);
+
fput_light(file, fput_needed);
bad:
return retval;
@@ -214,6 +227,8 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
origin);
+ trace_fs_llseek(fd, offset, origin);
+
retval = (int)offset;
if (offset >= 0) {
retval = -EFAULT;
@@ -409,6 +424,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_read(file, buf, count, &pos);
+ trace_fs_read(fd, buf, count, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
@@ -427,6 +443,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_write(file, buf, count, &pos);
+ trace_fs_write(fd, buf, count, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
@@ -447,8 +464,11 @@ SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
file = fget_light(fd, &fput_needed);
if (file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PREAD)
+ if (file->f_mode & FMODE_PREAD) {
ret = vfs_read(file, buf, count, &pos);
+ trace_fs_pread64(fd, buf, count, pos, ret);
+ }
+
fput_light(file, fput_needed);
}
@@ -476,8 +496,10 @@ SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
file = fget_light(fd, &fput_needed);
if (file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PWRITE)
+ if (file->f_mode & FMODE_PWRITE) {
ret = vfs_write(file, buf, count, &pos);
+ trace_fs_pwrite64(fd, buf, count, pos, ret);
+ }
fput_light(file, fput_needed);
}
@@ -736,6 +758,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_readv(file, vec, vlen, &pos);
+ trace_fs_readv(fd, vec, vlen, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
@@ -757,6 +780,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_writev(file, vec, vlen, &pos);
+ trace_fs_writev(fd, vec, vlen, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
diff --git a/fs/select.c b/fs/select.c
index e56560d2b08..64c2404f2cc 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
#include <linux/fs.h>
#include <linux/rcupdate.h>
#include <linux/hrtimer.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
@@ -98,6 +99,9 @@ struct poll_table_page {
#define POLL_TABLE_FULL(table) \
((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
+DEFINE_TRACE(fs_select);
+DEFINE_TRACE(fs_poll);
+
/*
* Ok, Peter made a complicated, but straightforward multiple_wait() function.
* I have rewritten this, taking some shortcuts: This code may not be easy to
@@ -112,6 +116,9 @@ struct poll_table_page {
*/
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
poll_table *p);
+static void __pollwait_exclusive(struct file *filp,
+ wait_queue_head_t *wait_address,
+ poll_table *p);
void poll_initwait(struct poll_wqueues *pwq)
{
@@ -152,6 +159,20 @@ void poll_freewait(struct poll_wqueues *pwq)
}
EXPORT_SYMBOL(poll_freewait);
+/**
+ * poll_wait_set_exclusive - set poll wait queue to exclusive
+ *
+ * Sets up a poll wait queue to use exclusive wakeups. This is useful to
+ * wake up only one waiter at each wakeup. Used to work-around "thundering herd"
+ * problem.
+ */
+void poll_wait_set_exclusive(poll_table *p)
+{
+ if (p)
+ init_poll_funcptr(p, __pollwait_exclusive);
+}
+EXPORT_SYMBOL(poll_wait_set_exclusive);
+
static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
{
struct poll_table_page *table = p->table;
@@ -213,8 +234,10 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
}
/* Add a new entry */
-static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
- poll_table *p)
+static void __pollwait_common(struct file *filp,
+ wait_queue_head_t *wait_address,
+ poll_table *p,
+ int exclusive)
{
struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
struct poll_table_entry *entry = poll_get_entry(pwq);
@@ -226,7 +249,23 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
entry->key = p->key;
init_waitqueue_func_entry(&entry->wait, pollwake);
entry->wait.private = pwq;
- add_wait_queue(wait_address, &entry->wait);
+ if (!exclusive)
+ add_wait_queue(wait_address, &entry->wait);
+ else
+ add_wait_queue_exclusive(wait_address, &entry->wait);
+}
+
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
+ poll_table *p)
+{
+ __pollwait_common(filp, wait_address, p, 0);
+}
+
+static void __pollwait_exclusive(struct file *filp,
+ wait_queue_head_t *wait_address,
+ poll_table *p)
+{
+ __pollwait_common(filp, wait_address, p, 1);
}
int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
@@ -450,6 +489,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
file = fget_light(i, &fput_needed);
if (file) {
f_op = file->f_op;
+ trace_fs_select(i, end_time);
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll) {
wait_key_set(wait, in, out, bit);
@@ -739,6 +779,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
file = fget_light(fd, &fput_needed);
mask = POLLNVAL;
if (file != NULL) {
+ trace_fs_poll(fd);
mask = DEFAULT_POLLMASK;
if (file->f_op && file->f_op->poll) {
if (pwait)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 05d6b0e78c9..691c84baf4f 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -819,3 +819,47 @@ struct hlist_node *seq_hlist_next_rcu(void *v,
return rcu_dereference(node->next);
}
EXPORT_SYMBOL(seq_hlist_next_rcu);
+
+struct list_head *seq_sorted_list_start(struct list_head *head, loff_t *ppos)
+{
+ struct list_head *lh;
+
+ list_for_each(lh, head)
+ if ((unsigned long)lh >= *ppos) {
+ *ppos = (unsigned long)lh;
+ return lh;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(seq_sorted_list_start);
+
+struct list_head *seq_sorted_list_start_head(struct list_head *head,
+ loff_t *ppos)
+{
+ struct list_head *lh;
+
+ if (!*ppos) {
+ *ppos = (unsigned long)head;
+ return head;
+ }
+ list_for_each(lh, head)
+ if ((unsigned long)lh >= *ppos) {
+ *ppos = (long)lh->prev;
+ return lh->prev;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(seq_sorted_list_start_head);
+
+struct list_head *seq_sorted_list_next(void *p, struct list_head *head,
+ loff_t *ppos)
+{
+ struct list_head *lh;
+ void *next;
+
+ lh = ((struct list_head *)p)->next;
+ next = (lh == head) ? NULL : lh;
+ *ppos = next ? ((unsigned long)next) : (-1UL);
+ return next;
+}
+EXPORT_SYMBOL(seq_sorted_list_next);
diff --git a/fs/splice.c b/fs/splice.c
index 50a5d978da1..e76aac5c993 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -259,6 +259,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
return ret;
}
+EXPORT_SYMBOL_GPL(splice_to_pipe);
static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
diff --git a/fs/super.c b/fs/super.c
index 7e9dd4cc2c0..0d89e93f654 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
#else
INIT_LIST_HEAD(&s->s_files);
#endif
+ s->s_bdi = &default_backing_dev_info;
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_BL_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
@@ -1003,6 +1004,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
}
BUG_ON(!mnt->mnt_sb);
WARN_ON(!mnt->mnt_sb->s_bdi);
+ WARN_ON(mnt->mnt_sb->s_bdi == &default_backing_dev_info);
mnt->mnt_sb->s_flags |= MS_BORN;
error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
diff --git a/fs/sync.c b/fs/sync.c
index ba76b9623e7..412dc89163d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -33,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
* This should be safe, as we require bdi backing to actually
* write out data in the first place
*/
- if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
+ if (sb->s_bdi == &noop_backing_dev_info)
return 0;
if (sb->s_qcop && sb->s_qcop->quota_sync)
@@ -79,7 +79,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
static void sync_one_sb(struct super_block *sb, void *arg)
{
- if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi)
+ if (!(sb->s_flags & MS_RDONLY))
__sync_filesystem(sb, *(int *)arg);
}
/*