summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_elf.c11
-rw-r--r--fs/block_dev.c13
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/cifs/inode.c19
-rw-r--r--fs/ecryptfs/crypto.c26
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h5
-rw-r--r--fs/ecryptfs/file.c23
-rw-r--r--fs/ecryptfs/inode.c52
-rw-r--r--fs/eventpoll.c25
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/inode.c3
-rw-r--r--fs/ext4/namei.c8
-rw-r--r--fs/ext4/xattr.c6
-rw-r--r--fs/hfs/trans.c2
-rw-r--r--fs/namei.c34
-rw-r--r--fs/namespace.c1
-rw-r--r--fs/nfs/blocklayout/blocklayout.c58
-rw-r--r--fs/nfs/blocklayout/blocklayout.h4
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c35
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/file.c32
-rw-r--r--fs/nfs/idmap.c25
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4filelayout.c19
-rw-r--r--fs/nfs/nfs4proc.c1
-rw-r--r--fs/nfs/pnfs.c52
-rw-r--r--fs/nfs/pnfs.h5
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/read.c12
-rw-r--r--fs/nfs/write.c68
-rw-r--r--fs/nfsd/nfs4proc.c6
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/nfsd/nfs4state.c30
-rw-r--r--fs/nfsd/nfs4xdr.c12
-rw-r--r--fs/nfsd/state.h6
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nfsd/vfs.h1
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/stat.c5
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_file.c34
-rw-r--r--fs/xfs/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_mount.c29
-rw-r--r--fs/xfs/xfs_qm.c3
-rw-r--r--fs/xfs/xfs_vnodeops.c14
48 files changed, 425 insertions, 282 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index dd0fdfc56d3..21ac5ee4b43 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -795,7 +795,16 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
#if defined(CONFIG_X86) || defined(CONFIG_ARM)
- load_bias = 0;
+ /* Memory randomization might have been switched off
+ * in runtime via sysctl.
+ * If that is the case, retain the original non-zero
+ * load_bias value in order to establish proper
+ * non-randomized mappings.
+ */
+ if (current->flags & PF_RANDOMIZE)
+ load_bias = 0;
+ else
+ load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#endif
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 95f786ec7f0..1c44b8d5450 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1085,6 +1085,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
{
struct gendisk *disk;
+ struct module *owner;
int ret;
int partno;
int perm = 0;
@@ -1110,6 +1111,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
disk = get_gendisk(bdev->bd_dev, &partno);
if (!disk)
goto out;
+ owner = disk->fops->owner;
disk_block_events(disk);
mutex_lock_nested(&bdev->bd_mutex, for_part);
@@ -1137,8 +1139,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = NULL;
mutex_unlock(&bdev->bd_mutex);
disk_unblock_events(disk);
- module_put(disk->fops->owner);
put_disk(disk);
+ module_put(owner);
goto restart;
}
}
@@ -1194,8 +1196,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_unlock_bdev;
}
/* only one opener holds refs to the module and disk */
- module_put(disk->fops->owner);
put_disk(disk);
+ module_put(owner);
}
bdev->bd_openers++;
if (for_part)
@@ -1215,8 +1217,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
disk_unblock_events(disk);
- module_put(disk->fops->owner);
put_disk(disk);
+ module_put(owner);
out:
bdput(bdev);
@@ -1442,14 +1444,15 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
if (!bdev->bd_openers) {
struct module *owner = disk->fops->owner;
- put_disk(disk);
- module_put(owner);
disk_put_part(bdev->bd_part);
bdev->bd_part = NULL;
bdev->bd_disk = NULL;
if (bdev != bdev->bd_contains)
victim = bdev->bd_contains;
bdev->bd_contains = NULL;
+
+ put_disk(disk);
+ module_put(owner);
}
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 71beb020197..28bba5791d9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2807,10 +2807,10 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
/*
* When the server doesn't allow large posix writes, only allow a wsize of
- * 128k minus the size of the WRITE_AND_X header. That allows for a write up
+ * 2^17-1 minus the size of the WRITE_AND_X header. That allows for a write up
* to the maximum size described by RFC1002.
*/
-#define CIFS_MAX_RFC1002_WSIZE (128 * 1024 - sizeof(WRITE_REQ) + 4)
+#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4)
/*
* The default wsize is 1M. find_get_pages seems to return a maximum of 256
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a7b2dcd4a53..745e5cdca8f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -562,7 +562,16 @@ int cifs_get_file_info(struct file *filp)
xid = GetXid();
rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data);
- if (rc == -EOPNOTSUPP || rc == -EINVAL) {
+ switch (rc) {
+ case 0:
+ cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
+ break;
+ case -EREMOTE:
+ cifs_create_dfs_fattr(&fattr, inode->i_sb);
+ rc = 0;
+ break;
+ case -EOPNOTSUPP:
+ case -EINVAL:
/*
* FIXME: legacy server -- fall back to path-based call?
* for now, just skip revalidating and mark inode for
@@ -570,18 +579,14 @@ int cifs_get_file_info(struct file *filp)
*/
rc = 0;
CIFS_I(inode)->time = 0;
+ default:
goto cgfi_exit;
- } else if (rc == -EREMOTE) {
- cifs_create_dfs_fattr(&fattr, inode->i_sb);
- rc = 0;
- } else if (rc)
- goto cgfi_exit;
+ }
/*
* don't bother with SFU junk here -- just mark inode as needing
* revalidation.
*/
- cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
fattr.cf_uniqueid = CIFS_I(inode)->uniqueid;
fattr.cf_flags |= CIFS_FATTR_NEED_REVAL;
cifs_fattr_to_inode(inode, &fattr);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 58609bde3b9..2a834255c75 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -967,7 +967,7 @@ static void ecryptfs_set_default_crypt_stat_vals(
/**
* ecryptfs_new_file_context
- * @ecryptfs_dentry: The eCryptfs dentry
+ * @ecryptfs_inode: The eCryptfs inode
*
* If the crypto context for the file has not yet been established,
* this is where we do that. Establishing a new crypto context
@@ -984,13 +984,13 @@ static void ecryptfs_set_default_crypt_stat_vals(
*
* Returns zero on success; non-zero otherwise
*/
-int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
+int ecryptfs_new_file_context(struct inode *ecryptfs_inode)
{
struct ecryptfs_crypt_stat *crypt_stat =
- &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+ &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
&ecryptfs_superblock_to_private(
- ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ ecryptfs_inode->i_sb)->mount_crypt_stat;
int cipher_name_len;
int rc = 0;
@@ -1299,12 +1299,12 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
}
static int
-ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry,
+ecryptfs_write_metadata_to_contents(struct inode *ecryptfs_inode,
char *virt, size_t virt_len)
{
int rc;
- rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt,
+ rc = ecryptfs_write_lower(ecryptfs_inode, virt,
0, virt_len);
if (rc < 0)
printk(KERN_ERR "%s: Error attempting to write header "
@@ -1338,7 +1338,8 @@ static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
/**
* ecryptfs_write_metadata
- * @ecryptfs_dentry: The eCryptfs dentry
+ * @ecryptfs_dentry: The eCryptfs dentry, which should be negative
+ * @ecryptfs_inode: The newly created eCryptfs inode
*
* Write the file headers out. This will likely involve a userspace
* callout, in which the session key is encrypted with one or more
@@ -1348,10 +1349,11 @@ static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
*
* Returns zero on success; non-zero on error
*/
-int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
+int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
+ struct inode *ecryptfs_inode)
{
struct ecryptfs_crypt_stat *crypt_stat =
- &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+ &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
unsigned int order;
char *virt;
size_t virt_len;
@@ -1391,7 +1393,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt,
size);
else
- rc = ecryptfs_write_metadata_to_contents(ecryptfs_dentry, virt,
+ rc = ecryptfs_write_metadata_to_contents(ecryptfs_inode, virt,
virt_len);
if (rc) {
printk(KERN_ERR "%s: Error writing metadata out to lower file; "
@@ -1943,7 +1945,7 @@ static unsigned char *portable_filename_chars = ("-.0123456789ABCD"
/* We could either offset on every reverse map or just pad some 0x00's
* at the front here */
-static const unsigned char filename_rev_map[] = {
+static const unsigned char filename_rev_map[256] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */
@@ -1959,7 +1961,7 @@ static const unsigned char filename_rev_map[] = {
0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */
0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */
0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */
- 0x3D, 0x3E, 0x3F
+ 0x3D, 0x3E, 0x3F /* 123 - 255 initialized to 0x00 */
};
/**
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index b36c5572b3f..9ce1e92c7d9 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -584,9 +584,10 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode);
int ecryptfs_encrypt_page(struct page *page);
int ecryptfs_decrypt_page(struct page *page);
-int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry);
+int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
+ struct inode *ecryptfs_inode);
int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry);
-int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
+int ecryptfs_new_file_context(struct inode *ecryptfs_inode);
void ecryptfs_write_crypt_stat_flags(char *page_virt,
struct ecryptfs_crypt_stat *crypt_stat,
size_t *written);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c6ac98cf9ba..d3f95f941c4 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -139,6 +139,27 @@ out:
return rc;
}
+static void ecryptfs_vma_close(struct vm_area_struct *vma)
+{
+ filemap_write_and_wait(vma->vm_file->f_mapping);
+}
+
+static const struct vm_operations_struct ecryptfs_file_vm_ops = {
+ .close = ecryptfs_vma_close,
+ .fault = filemap_fault,
+};
+
+static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int rc;
+
+ rc = generic_file_mmap(file, vma);
+ if (!rc)
+ vma->vm_ops = &ecryptfs_file_vm_ops;
+
+ return rc;
+}
+
struct kmem_cache *ecryptfs_file_info_cache;
/**
@@ -349,7 +370,7 @@ const struct file_operations ecryptfs_main_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
#endif
- .mmap = generic_file_mmap,
+ .mmap = ecryptfs_file_mmap,
.open = ecryptfs_open,
.flush = ecryptfs_flush,
.release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 11f8582d721..528da01fec2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -172,22 +172,23 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
* it. It will also update the eCryptfs directory inode to mimic the
* stat of the lower directory inode.
*
- * Returns zero on success; non-zero on error condition
+ * Returns the new eCryptfs inode on success; an ERR_PTR on error condition
*/
-static int
+static struct inode *
ecryptfs_do_create(struct inode *directory_inode,
struct dentry *ecryptfs_dentry, int mode)
{
int rc;
struct dentry *lower_dentry;
struct dentry *lower_dir_dentry;
+ struct inode *inode;
lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
lower_dir_dentry = lock_parent(lower_dentry);
if (IS_ERR(lower_dir_dentry)) {
ecryptfs_printk(KERN_ERR, "Error locking directory of "
"dentry\n");
- rc = PTR_ERR(lower_dir_dentry);
+ inode = ERR_CAST(lower_dir_dentry);
goto out;
}
rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
@@ -195,20 +196,19 @@ ecryptfs_do_create(struct inode *directory_inode,
if (rc) {
printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
"rc = [%d]\n", __func__, rc);
+ inode = ERR_PTR(rc);
goto out_lock;
}
- rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
- directory_inode->i_sb);
- if (rc) {
- ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n");
+ inode = __ecryptfs_get_inode(lower_dentry->d_inode,
+ directory_inode->i_sb);
+ if (IS_ERR(inode))
goto out_lock;
- }
fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode);
out_lock:
unlock_dir(lower_dir_dentry);
out:
- return rc;
+ return inode;
}
/**
@@ -219,26 +219,26 @@ out:
*
* Returns zero on success
*/
-static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
+static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
+ struct inode *ecryptfs_inode)
{
struct ecryptfs_crypt_stat *crypt_stat =
- &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+ &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
int rc = 0;
- if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
+ if (S_ISDIR(ecryptfs_inode->i_mode)) {
ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
goto out;
}
ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
- rc = ecryptfs_new_file_context(ecryptfs_dentry);
+ rc = ecryptfs_new_file_context(ecryptfs_inode);
if (rc) {
ecryptfs_printk(KERN_ERR, "Error creating new file "
"context; rc = [%d]\n", rc);
goto out;
}
- rc = ecryptfs_get_lower_file(ecryptfs_dentry,
- ecryptfs_dentry->d_inode);
+ rc = ecryptfs_get_lower_file(ecryptfs_dentry, ecryptfs_inode);
if (rc) {
printk(KERN_ERR "%s: Error attempting to initialize "
"the lower file for the dentry with name "
@@ -246,10 +246,10 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
ecryptfs_dentry->d_name.name, rc);
goto out;
}
- rc = ecryptfs_write_metadata(ecryptfs_dentry);
+ rc = ecryptfs_write_metadata(ecryptfs_dentry, ecryptfs_inode);
if (rc)
printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
- ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
+ ecryptfs_put_lower_file(ecryptfs_inode);
out:
return rc;
}
@@ -269,18 +269,28 @@ static int
ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
int mode, struct nameidata *nd)
{
+ struct inode *ecryptfs_inode;
int rc;
- /* ecryptfs_do_create() calls ecryptfs_interpose() */
- rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode);
- if (unlikely(rc)) {
+ ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry,
+ mode);
+ if (unlikely(IS_ERR(ecryptfs_inode))) {
ecryptfs_printk(KERN_WARNING, "Failed to create file in"
"lower filesystem\n");
+ rc = PTR_ERR(ecryptfs_inode);
goto out;
}
/* At this point, a file exists on "disk"; we need to make sure
* that this on disk file is prepared to be an ecryptfs file */
- rc = ecryptfs_initialize_file(ecryptfs_dentry);
+ rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode);
+ if (rc) {
+ drop_nlink(ecryptfs_inode);
+ unlock_new_inode(ecryptfs_inode);
+ iput(ecryptfs_inode);
+ goto out;
+ }
+ d_instantiate(ecryptfs_dentry, ecryptfs_inode);
+ unlock_new_inode(ecryptfs_inode);
out:
return rc;
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index fe047d966dc..2d1744ab5bc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -70,6 +70,15 @@
* simultaneous inserts (A into B and B into A) from racing and
* constructing a cycle without either insert observing that it is
* going to.
+ * It is necessary to acquire multiple "ep->mtx"es at once in the
+ * case when one epoll fd is added to another. In this case, we
+ * always acquire the locks in the order of nesting (i.e. after
+ * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired
+ * before e2->mtx). Since we disallow cycles of epoll file
+ * descriptors, this ensures that the mutexes are well-ordered. In
+ * order to communicate this nesting to lockdep, when walking a tree
+ * of epoll file descriptors, we use the current recursion depth as
+ * the lockdep subkey.
* It is possible to drop the "ep->mtx" and to use the global
* mutex "epmutex" (together with "ep->lock") to have it working,
* but having "ep->mtx" will make the interface more scalable.
@@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
* @ep: Pointer to the epoll private data structure.
* @sproc: Pointer to the scan callback.
* @priv: Private opaque data passed to the @sproc callback.
+ * @depth: The current depth of recursive f_op->poll calls.
*
* Returns: The same integer error code returned by the @sproc callback.
*/
static int ep_scan_ready_list(struct eventpoll *ep,
int (*sproc)(struct eventpoll *,
struct list_head *, void *),
- void *priv)
+ void *priv,
+ int depth)
{
int error, pwake = 0;
unsigned long flags;
@@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
* We need to lock this because we could be hit by
* eventpoll_release_file() and epoll_ctl().
*/
- mutex_lock(&ep->mtx);
+ mutex_lock_nested(&ep->mtx, depth);
/*
* Steal the ready list, and re-init the original one to the
@@ -670,7 +681,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
{
- return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
+ return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
}
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
@@ -737,7 +748,7 @@ void eventpoll_release_file(struct file *file)
ep = epi->ep;
list_del_init(&epi->fllink);
- mutex_lock(&ep->mtx);
+ mutex_lock_nested(&ep->mtx, 0);
ep_remove(ep, epi);
mutex_unlock(&ep->mtx);
}
@@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpoll *ep,
esed.maxevents = maxevents;
esed.events = events;
- return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
+ return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
}
static inline struct timespec ep_set_mstimeout(long ms)
@@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
struct rb_node *rbp;
struct epitem *epi;
- mutex_lock(&ep->mtx);
+ mutex_lock_nested(&ep->mtx, call_nests + 1);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
@@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
}
- mutex_lock(&ep->mtx);
+ mutex_lock_nested(&ep->mtx, 0);
/*
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b7d7bd0f066..5c38120c389 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -358,8 +358,7 @@ struct flex_groups {
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
- EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
- EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
+ EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 986e2388f03..b644b9c164a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2656,8 +2656,8 @@ out:
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
/* queue the work to convert unwritten extents to written */
- queue_work(wq, &io_end->work);
iocb->private = NULL;
+ queue_work(wq, &io_end->work);
/* XXX: probably should move into the real I/O completion handler */
inode_dio_done(inode);
@@ -4416,6 +4416,7 @@ retry_alloc:
PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
unlock_page(page);
ret = VM_FAULT_SIGBUS;
+ ext4_journal_stop(handle);
goto out;
}
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c924faeb6c..50c72943d7b 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1586,7 +1586,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
dxtrace(dx_show_index("node", frames[1].entries));
dxtrace(dx_show_index("node",
((struct dx_node *) bh2->b_data)->entries));
- err = ext4_handle_dirty_metadata(handle, inode, bh2);
+ err = ext4_handle_dirty_metadata(handle, dir, bh2);
if (err)
goto journal_error;
brelse (bh2);
@@ -1612,7 +1612,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
}
- err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
+ err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
if (err) {
ext4_std_error(inode->i_sb, err);
goto cleanup;
@@ -1863,7 +1863,7 @@ retry:
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, dir, dir_block);
+ err = ext4_handle_dirty_metadata(handle, inode, dir_block);
if (err)
goto out_clear_inode;
err = ext4_mark_inode_dirty(handle, inode);
@@ -2530,7 +2530,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
- retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
+ retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
if (retval) {
ext4_std_error(old_dir->i_sb, retval);
goto end_rename;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c757adc9725..19fe4e3d39e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -820,8 +820,14 @@ inserted:
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
+ /*
+ * take i_data_sem because we will test
+ * i_delalloc_reserved_flag in ext4_mb_new_blocks
+ */
+ down_read((&EXT4_I(inode)->i_data_sem));
block = ext4_new_meta_blocks(handle, inode, goal, 0,
NULL, &error);
+ up_read((&EXT4_I(inode)->i_data_sem));
if (error)
goto cleanup;
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index e673a88b8ae..b1ce4c7ad3f 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -40,6 +40,8 @@ int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
src = in->name;
srclen = in->len;
+ if (srclen > HFS_NAMELEN)
+ srclen = HFS_NAMELEN;
dst = out;
dstlen = HFS_MAX_NAMELEN;
if (nls_io) {
diff --git a/fs/namei.c b/fs/namei.c
index 0b3138de2a3..3d150720c37 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -137,7 +137,7 @@ static int do_getname(const char __user *filename, char *page)
return retval;
}
-static char *getname_flags(const char __user * filename, int flags)
+static char *getname_flags(const char __user *filename, int flags, int *empty)
{
char *tmp, *result;
@@ -148,6 +148,8 @@ static char *getname_flags(const char __user * filename, int flags)
result = tmp;
if (retval < 0) {
+ if (retval == -ENOENT && empty)
+ *empty = 1;
if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
__putname(tmp);
result = ERR_PTR(retval);
@@ -160,7 +162,7 @@ static char *getname_flags(const char __user * filename, int flags)
char *getname(const char __user * filename)
{
- return getname_flags(filename, 0);
+ return getname_flags(filename, 0, 0);
}
#ifdef CONFIG_AUDITSYSCALL
@@ -850,7 +852,7 @@ static int follow_managed(struct path *path, unsigned flags)
mntput(path->mnt);
if (ret == -EISDIR)
ret = 0;
- return ret;
+ return ret < 0 ? ret : need_mntput;
}
int follow_down_one(struct path *path)
@@ -898,6 +900,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
break;
path->mnt = mounted;
path->dentry = mounted->mnt_root;
+ nd->flags |= LOOKUP_JUMPED;
nd->seq = read_seqcount_begin(&path->dentry->d_seq);
/*
* Update the inode too. We don't need to re-check the
@@ -1211,6 +1214,8 @@ retry:
path_put_conditional(path, nd);
return err;
}
+ if (err)
+ nd->flags |= LOOKUP_JUMPED;
*inode = path->dentry->d_inode;
return 0;
}
@@ -1798,11 +1803,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
return __lookup_hash(&this, base, NULL);
}
-int user_path_at(int dfd, const char __user *name, unsigned flags,
- struct path *path)
+int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
+ struct path *path, int *empty)
{
struct nameidata nd;
- char *tmp = getname_flags(name, flags);
+ char *tmp = getname_flags(name, flags, empty);
int err = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
@@ -1816,6 +1821,12 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
return err;
}
+int user_path_at(int dfd, const char __user *name, unsigned flags,
+ struct path *path)
+{
+ return user_path_at_empty(dfd, name, flags, path, 0);
+}
+
static int user_path_parent(int dfd, const char __user *path,
struct nameidata *nd, char **name)
{
@@ -2141,6 +2152,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
}
/* create side of things */
+ /*
+ * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been
+ * cleared when we got to the last component we are about to look up
+ */
error = complete_walk(nd);
if (error)
return ERR_PTR(error);
@@ -2209,6 +2224,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (error < 0)
goto exit_dput;
+ if (error)
+ nd->flags |= LOOKUP_JUMPED;
+
error = -ENOENT;
if (!path->dentry->d_inode)
goto exit_dput;
@@ -2218,6 +2236,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
path_to_nameidata(path, nd);
nd->inode = path->dentry->d_inode;
+ /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
+ error = complete_walk(nd);
+ if (error)
+ goto exit;
error = -EISDIR;
if (S_ISDIR(nd->inode->i_mode))
goto exit;
diff --git a/fs/namespace.c b/fs/namespace.c
index b4febb29d3b..e5e1c7d1839 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1109,6 +1109,7 @@ static int show_vfsstat(struct seq_file *m, void *v)
/* device */
if (mnt->mnt_sb->s_op->show_devname) {
+ seq_puts(m, "device ");
err = mnt->mnt_sb->s_op->show_devname(m, mnt);
} else {
if (mnt->mnt_devname) {
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9561c8fc8bd..281ae95932c 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -176,17 +176,6 @@ retry:
return bio;
}
-static void bl_set_lo_fail(struct pnfs_layout_segment *lseg)
-{
- if (lseg->pls_range.iomode == IOMODE_RW) {
- dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- } else {
- dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
- }
-}
-
/* This is basically copied from mpage_end_io_read */
static void bl_end_io_read(struct bio *bio, int err)
{
@@ -206,7 +195,7 @@ static void bl_end_io_read(struct bio *bio, int err)
if (!uptodate) {
if (!rdata->pnfs_error)
rdata->pnfs_error = -EIO;
- bl_set_lo_fail(rdata->lseg);
+ pnfs_set_lo_fail(rdata->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -303,6 +292,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
bl_end_io_read, par);
if (IS_ERR(bio)) {
rdata->pnfs_error = PTR_ERR(bio);
+ bio = NULL;
goto out;
}
}
@@ -370,7 +360,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
if (!uptodate) {
if (!wdata->pnfs_error)
wdata->pnfs_error = -EIO;
- bl_set_lo_fail(wdata->lseg);
+ pnfs_set_lo_fail(wdata->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -386,7 +376,7 @@ static void bl_end_io_write(struct bio *bio, int err)
if (!uptodate) {
if (!wdata->pnfs_error)
wdata->pnfs_error = -EIO;
- bl_set_lo_fail(wdata->lseg);
+ pnfs_set_lo_fail(wdata->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -543,6 +533,11 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
fill_invalid_ext:
dprintk("%s need to zero %d pages\n", __func__, npg_zero);
for (;npg_zero > 0; npg_zero--) {
+ if (bl_is_sector_init(be->be_inval, isect)) {
+ dprintk("isect %llu already init\n",
+ (unsigned long long)isect);
+ goto next_page;
+ }
/* page ref released in bl_end_io_write_zero */
index = isect >> PAGE_CACHE_SECTOR_SHIFT;
dprintk("%s zero %dth page: index %lu isect %llu\n",
@@ -562,8 +557,7 @@ fill_invalid_ext:
* PageUptodate: It was read before
* sector_initialized: already written out
*/
- if (PageDirty(page) || PageWriteback(page) ||
- bl_is_sector_init(be->be_inval, isect)) {
+ if (PageDirty(page) || PageWriteback(page)) {
print_page(page);
unlock_page(page);
page_cache_release(page);
@@ -592,6 +586,7 @@ fill_invalid_ext:
bl_end_io_write_zero, par);
if (IS_ERR(bio)) {
wdata->pnfs_error = PTR_ERR(bio);
+ bio = NULL;
goto out;
}
/* FIXME: This should be done in bi_end_io */
@@ -640,6 +635,7 @@ next_page:
bl_end_io_write, par);
if (IS_ERR(bio)) {
wdata->pnfs_error = PTR_ERR(bio);
+ bio = NULL;
goto out;
}
isect += PAGE_CACHE_SECTORS;
@@ -805,7 +801,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
struct nfs4_deviceid *d_id)
{
struct pnfs_device *dev;
- struct pnfs_block_dev *rv = NULL;
+ struct pnfs_block_dev *rv;
u32 max_resp_sz;
int max_pages;
struct page **pages = NULL;
@@ -823,18 +819,20 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
dev = kmalloc(sizeof(*dev), GFP_NOFS);
if (!dev) {
dprintk("%s kmalloc failed\n", __func__);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
kfree(dev);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
for (i = 0; i < max_pages; i++) {
pages[i] = alloc_page(GFP_NOFS);
- if (!pages[i])
+ if (!pages[i]) {
+ rv = ERR_PTR(-ENOMEM);
goto out_free;
+ }
}
memcpy(&dev->dev_id, d_id, sizeof(*d_id));
@@ -847,8 +845,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
rc = nfs4_proc_getdeviceinfo(server, dev);
dprintk("%s getdevice info returns %d\n", __func__, rc);
- if (rc)
+ if (rc) {
+ rv = ERR_PTR(rc);
goto out_free;
+ }
rv = nfs4_blk_decode_device(server, dev);
out_free:
@@ -866,7 +866,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
struct pnfs_devicelist *dlist = NULL;
struct pnfs_block_dev *bdev;
LIST_HEAD(block_disklist);
- int status = 0, i;
+ int status, i;
dprintk("%s enter\n", __func__);
@@ -898,8 +898,8 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
for (i = 0; i < dlist->num_devs; i++) {
bdev = nfs4_blk_get_deviceinfo(server, fh,
&dlist->dev_id[i]);
- if (!bdev) {
- status = -ENODEV;
+ if (IS_ERR(bdev)) {
+ status = PTR_ERR(bdev);
goto out_error;
}
spin_lock(&b_mt_id->bm_lock);
@@ -960,7 +960,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
};
static const struct rpc_pipe_ops bl_upcall_ops = {
- .upcall = bl_pipe_upcall,
+ .upcall = rpc_pipe_generic_upcall,
.downcall = bl_pipe_downcall,
.destroy_msg = bl_pipe_destroy_msg,
};
@@ -989,17 +989,20 @@ static int __init nfs4blocklayout_init(void)
mnt,
NFS_PIPE_DIRNAME, 0, &path);
if (ret)
- goto out_remove;
+ goto out_putrpc;
bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
&bl_upcall_ops, 0);
+ path_put(&path);
if (IS_ERR(bl_device_pipe)) {
ret = PTR_ERR(bl_device_pipe);
- goto out_remove;
+ goto out_putrpc;
}
out:
return ret;
+out_putrpc:
+ rpc_put_mount();
out_remove:
pnfs_unregister_layoutdriver(&blocklayout_type);
return ret;
@@ -1012,6 +1015,7 @@ static void __exit nfs4blocklayout_exit(void)
pnfs_unregister_layoutdriver(&blocklayout_type);
rpc_unlink(bl_device_pipe);
+ rpc_put_mount();
}
MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index f27d827960a..42acf7ef599 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -150,7 +150,7 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
}
struct bl_dev_msg {
- int status;
+ int32_t status;
uint32_t major, minor;
};
@@ -169,8 +169,6 @@ extern wait_queue_head_t bl_wq;
#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
/* blocklayoutdev.c */
-ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
- char __user *, size_t);
ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
struct block_device *nfs4_blkdev_get(dev_t dev);
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a83b393fb01..d08ba9107fd 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -79,28 +79,6 @@ int nfs4_blkdev_put(struct block_device *bdev)
return blkdev_put(bdev, FMODE_READ);
}
-/*
- * Shouldn't there be a rpc_generic_upcall() to do this for us?
- */
-ssize_t bl_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
- char __user *dst, size_t buflen)
-{
- char *data = (char *)msg->data + msg->copied;
- size_t mlen = min(msg->len - msg->copied, buflen);
- unsigned long left;
-
- left = copy_to_user(dst, data, mlen);
- if (left == mlen) {
- msg->errno = -EFAULT;
- return -EFAULT;
- }
-
- mlen -= left;
- msg->copied += mlen;
- msg->errno = 0;
- return mlen;
-}
-
static struct bl_dev_msg bl_mount_reply;
ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
@@ -131,7 +109,7 @@ struct pnfs_block_dev *
nfs4_blk_decode_device(struct nfs_server *server,
struct pnfs_device *dev)
{
- struct pnfs_block_dev *rv = NULL;
+ struct pnfs_block_dev *rv;
struct block_device *bd = NULL;
struct rpc_pipe_msg msg;
struct bl_msg_hdr bl_msg = {
@@ -141,7 +119,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
uint8_t *dataptr;
DECLARE_WAITQUEUE(wq, current);
struct bl_dev_msg *reply = &bl_mount_reply;
- int offset, len, i;
+ int offset, len, i, rc;
dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
@@ -168,8 +146,10 @@ nfs4_blk_decode_device(struct nfs_server *server,
dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
add_wait_queue(&bl_wq, &wq);
- if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) {
+ rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg);
+ if (rc < 0) {
remove_wait_queue(&bl_wq, &wq);
+ rv = ERR_PTR(rc);
goto out;
}
@@ -187,8 +167,9 @@ nfs4_blk_decode_device(struct nfs_server *server,
bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor));
if (IS_ERR(bd)) {
- dprintk("%s failed to open device : %ld\n",
- __func__, PTR_ERR(bd));
+ rc = PTR_ERR(bd);
+ dprintk("%s failed to open device : %d\n", __func__, rc);
+ rv = ERR_PTR(rc);
goto out;
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b238d95ac48..ac289909814 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1468,12 +1468,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
res = NULL;
goto out;
/* This turned out not to be a regular file */
+ case -EISDIR:
case -ENOTDIR:
goto no_open;
case -ELOOP:
if (!(nd->intent.open.flags & O_NOFOLLOW))
goto no_open;
- /* case -EISDIR: */
/* case -EINVAL: */
default:
res = ERR_CAST(inode);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 28b8c3f3cda..5b3d9842c40 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -895,3 +895,35 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
file->f_path.dentry->d_name.name, arg);
return -EINVAL;
}
+
+#ifdef CONFIG_NFS_V4
+static int
+nfs4_file_open(struct inode *inode, struct file *filp)
+{
+ /*
+ * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
+ * this point, then something is very wrong
+ */
+ dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
+ return -ENOTDIR;
+}
+
+const struct file_operations nfs4_file_operations = {
+ .llseek = nfs_file_llseek,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = nfs_file_read,
+ .aio_write = nfs_file_write,
+ .mmap = nfs_file_mmap,
+ .open = nfs4_file_open,
+ .flush = nfs_file_flush,
+ .release = nfs_file_release,
+ .fsync = nfs_file_fsync,
+ .lock = nfs_lock,
+ .flock = nfs_flock,
+ .splice_read = nfs_file_splice_read,
+ .splice_write = nfs_file_splice_write,
+ .check_flags = nfs_check_flags,
+ .setlease = nfs_setlease,
+};
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index f20801ae0a1..47d1c6ff2d8 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -336,8 +336,6 @@ struct idmap {
struct idmap_hashtable idmap_group_hash;
};
-static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
- char __user *, size_t);
static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
size_t);
static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
@@ -345,7 +343,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
static unsigned int fnvhash32(const void *, size_t);
static const struct rpc_pipe_ops idmap_upcall_ops = {
- .upcall = idmap_pipe_upcall,
+ .upcall = rpc_pipe_generic_upcall,
.downcall = idmap_pipe_downcall,
.destroy_msg = idmap_pipe_destroy_msg,
};
@@ -595,27 +593,6 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
return ret;
}
-/* RPC pipefs upcall/downcall routines */
-static ssize_t
-idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
- char __user *dst, size_t buflen)
-{
- char *data = (char *)msg->data + msg->copied;
- size_t mlen = min(msg->len, buflen);
- unsigned long left;
-
- left = copy_to_user(dst, data, mlen);
- if (left == mlen) {
- msg->errno = -EFAULT;
- return -EFAULT;
- }
-
- mlen -= left;
- msg->copied += mlen;
- msg->errno = 0;
- return mlen;
-}
-
static ssize_t
idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index fe1203797b2..679d2f50b14 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
*/
inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
if (S_ISREG(inode->i_mode)) {
- inode->i_fop = &nfs_file_operations;
+ inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
inode->i_data.a_ops = &nfs_file_aops;
inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
} else if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 85f1690ca08..d4bc9ed9174 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -853,6 +853,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs3_dir_inode_operations,
.file_inode_ops = &nfs3_file_inode_operations,
+ .file_ops = &nfs_file_operations,
.getroot = nfs3_proc_get_root,
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e8915d4840a..4c78c62639e 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -77,19 +77,6 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG();
}
-/* For data server errors we don't recover from */
-static void
-filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
-{
- if (lseg->pls_range.iomode == IOMODE_RW) {
- dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- } else {
- dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
- }
-}
-
static int filelayout_async_handle_error(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
@@ -145,7 +132,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
- filelayout_set_lo_fail(data->lseg);
+ pnfs_set_lo_fail(data->lseg);
nfs4_reset_read(task, data);
clp = NFS_SERVER(data->inode)->nfs_client;
}
@@ -221,7 +208,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
- filelayout_set_lo_fail(data->lseg);
+ pnfs_set_lo_fail(data->lseg);
nfs4_reset_write(task, data);
clp = NFS_SERVER(data->inode)->nfs_client;
} else
@@ -256,7 +243,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
prepare_to_resend_writes(data);
- filelayout_set_lo_fail(data->lseg);
+ pnfs_set_lo_fail(data->lseg);
} else
nfs_restart_rpc(task, data->ds_clp);
return -EAGAIN;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4700fae1ada..2d8a1693ed0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6267,6 +6267,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.dentry_ops = &nfs4_dentry_operations,
.dir_inode_ops = &nfs4_dir_inode_operations,
.file_inode_ops = &nfs4_file_inode_operations,
+ .file_ops = &nfs4_file_operations,
.getroot = nfs4_proc_get_root,
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e550e8836c3..ee73d9a4f70 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1168,23 +1168,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
/*
* Called by non rpc-based layout drivers
*/
-int
-pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_write_data *data)
{
- int status;
-
- if (!data->pnfs_error) {
+ if (likely(!data->pnfs_error)) {
pnfs_set_layoutcommit(data);
data->mds_ops->rpc_call_done(&data->task, data);
- data->mds_ops->rpc_release(data);
- return 0;
+ } else {
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ dprintk("pnfs write error = %d\n", data->pnfs_error);
}
-
- dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
- data->pnfs_error);
- status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
- data->mds_ops, NFS_FILE_SYNC);
- return status ? : -EAGAIN;
+ data->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1268,23 +1262,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
/*
* Called by non rpc-based layout drivers
*/
-int
-pnfs_ld_read_done(struct nfs_read_data *data)
+void pnfs_ld_read_done(struct nfs_read_data *data)
{
- int status;
-
- if (!data->pnfs_error) {
+ if (likely(!data->pnfs_error)) {
__nfs4_read_done_cb(data);
data->mds_ops->rpc_call_done(&data->task, data);
- data->mds_ops->rpc_release(data);
- return 0;
+ } else {
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ dprintk("pnfs write error = %d\n", data->pnfs_error);
}
-
- dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
- data->pnfs_error);
- status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
- data->mds_ops);
- return status ? : -EAGAIN;
+ data->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1381,6 +1369,18 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
}
}
+void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
+{
+ if (lseg->pls_range.iomode == IOMODE_RW) {
+ dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
+ set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+ } else {
+ dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
+ set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ }
+}
+EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
+
void
pnfs_set_layoutcommit(struct nfs_write_data *wdata)
{
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 01cbfd54f3c..1509530cb11 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -178,6 +178,7 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
@@ -200,8 +201,8 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
-int pnfs_ld_write_done(struct nfs_write_data *);
-int pnfs_ld_read_done(struct nfs_read_data *);
+void pnfs_ld_write_done(struct nfs_write_data *);
+void pnfs_ld_read_done(struct nfs_read_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ac40b8535d7..f48125da198 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -710,6 +710,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs_dir_inode_operations,
.file_inode_ops = &nfs_file_inode_operations,
+ .file_ops = &nfs_file_operations,
.getroot = nfs_proc_get_root,
.getattr = nfs_proc_getattr,
.setattr = nfs_proc_setattr,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2171c043ab0..bfc20b16024 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -541,13 +541,23 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
static void nfs_readpage_release_full(void *calldata)
{
struct nfs_read_data *data = calldata;
+ struct nfs_pageio_descriptor pgio;
+ if (data->pnfs_error) {
+ nfs_pageio_init_read_mds(&pgio, data->inode);
+ pgio.pg_recoalesce = 1;
+ }
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
- nfs_readpage_release(req);
+ if (!data->pnfs_error)
+ nfs_readpage_release(req);
+ else
+ nfs_pageio_add_request(&pgio, req);
}
+ if (data->pnfs_error)
+ nfs_pageio_complete(&pgio);
nfs_readdata_release(calldata);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c9bd2a6b7d4..106fd0634ab 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -428,7 +428,6 @@ static void
nfs_mark_request_dirty(struct nfs_page *req)
{
__set_page_dirty_nobuffers(req->wb_page);
- __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC);
}
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -762,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page,
status = nfs_writepage_setup(ctx, page, offset, count);
if (status < 0)
nfs_set_pageerror(page);
+ else
+ __set_page_dirty_nobuffers(page);
dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
status, (long long)i_size_read(inode));
@@ -1165,7 +1166,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
static void nfs_writeback_release_full(void *calldata)
{
struct nfs_write_data *data = calldata;
- int status = data->task.tk_status;
+ int ret, status = data->task.tk_status;
+ struct nfs_pageio_descriptor pgio;
+
+ if (data->pnfs_error) {
+ nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
+ pgio.pg_recoalesce = 1;
+ }
/* Update attributes as result of writeback. */
while (!list_empty(&data->pages)) {
@@ -1181,6 +1188,11 @@ static void nfs_writeback_release_full(void *calldata)
req->wb_bytes,
(long long)req_offset(req));
+ if (data->pnfs_error) {
+ dprintk(", pnfs error = %d\n", data->pnfs_error);
+ goto next;
+ }
+
if (status < 0) {
nfs_set_pageerror(page);
nfs_context_set_write_error(req->wb_context, status);
@@ -1200,7 +1212,19 @@ remove_request:
next:
nfs_clear_page_tag_locked(req);
nfs_end_page_writeback(page);
+ if (data->pnfs_error) {
+ lock_page(page);
+ nfs_pageio_cond_complete(&pgio, page->index);
+ ret = nfs_page_async_flush(&pgio, page, 0);
+ if (ret) {
+ nfs_set_pageerror(page);
+ dprintk("rewrite to MDS error = %d\n", ret);
+ }
+ unlock_page(page);
+ }
}
+ if (data->pnfs_error)
+ nfs_pageio_complete(&pgio);
nfs_writedata_release(calldata);
}
@@ -1553,6 +1577,10 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
int flags = FLUSH_SYNC;
int ret = 0;
+ /* no commits means nothing needs to be done */
+ if (!nfsi->ncommit)
+ return ret;
+
if (wbc->sync_mode == WB_SYNC_NONE) {
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
@@ -1686,34 +1714,20 @@ out_error:
int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page)
{
- struct nfs_page *req;
- int ret;
+ /*
+ * If PagePrivate is set, then the page is currently associated with
+ * an in-progress read or write request. Don't try to migrate it.
+ *
+ * FIXME: we could do this in principle, but we'll need a way to ensure
+ * that we can safely release the inode reference while holding
+ * the page lock.
+ */
+ if (PagePrivate(page))
+ return -EBUSY;
nfs_fscache_release_page(page, GFP_KERNEL);
- req = nfs_find_and_lock_request(page, false);
- ret = PTR_ERR(req);
- if (IS_ERR(req))
- goto out;
-
- ret = migrate_page(mapping, newpage, page);
- if (!req)
- goto out;
- if (ret)
- goto out_unlock;
- page_cache_get(newpage);
- spin_lock(&mapping->host->i_lock);
- req->wb_page = newpage;
- SetPagePrivate(newpage);
- set_page_private(newpage, (unsigned long)req);
- ClearPagePrivate(page);
- set_page_private(page, 0);
- spin_unlock(&mapping->host->i_lock);
- page_cache_release(page);
-out_unlock:
- nfs_clear_page_tag_locked(req);
-out:
- return ret;
+ return migrate_page(mapping, newpage, page);
}
#endif
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e8077766661..05b397ce70f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -156,6 +156,8 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
return nfserr_inval;
+ accmode |= NFSD_MAY_READ_IF_EXEC;
+
if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
accmode |= NFSD_MAY_READ;
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
@@ -691,7 +693,7 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion);
readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion);
- if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) ||
+ if ((cookie == 1) || (cookie == 2) ||
(cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE)))
return nfserr_bad_cookie;
@@ -930,7 +932,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
count = 4 + (verify->ve_attrlen >> 2);
buf = kmalloc(count << 2, GFP_KERNEL);
if (!buf)
- return nfserr_resource;
+ return nfserr_jukebox;
status = nfsd4_encode_fattr(&cstate->current_fh,
cstate->current_fh.fh_export,
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 29d77f60585..02eb38ecbfe 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -88,7 +88,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
struct xdr_netobj cksum;
struct hash_desc desc;
struct scatterlist sg;
- __be32 status = nfserr_resource;
+ __be32 status = nfserr_jukebox;
dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
clname->len, clname->data);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3787ec11740..6f8bcc733f7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -192,8 +192,15 @@ static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
{
if (atomic_dec_and_test(&fp->fi_access[oflag])) {
- nfs4_file_put_fd(fp, O_RDWR);
nfs4_file_put_fd(fp, oflag);
+ /*
+ * It's also safe to get rid of the RDWR open *if*
+ * we no longer have need of the other kind of access
+ * or if we already have the other kind of open:
+ */
+ if (fp->fi_fds[1-oflag]
+ || atomic_read(&fp->fi_access[1 - oflag]) == 0)
+ nfs4_file_put_fd(fp, O_RDWR);
}
}
@@ -1946,7 +1953,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* of 5 bullet points, labeled as CASE0 - CASE4 below.
*/
unconf = find_unconfirmed_client_by_str(dname, strhashval);
- status = nfserr_resource;
+ status = nfserr_jukebox;
if (!conf) {
/*
* RFC 3530 14.2.33 CASE 4:
@@ -2483,7 +2490,7 @@ renew:
if (open->op_stateowner == NULL) {
sop = alloc_init_open_stateowner(strhashval, clp, open);
if (sop == NULL)
- return nfserr_resource;
+ return nfserr_jukebox;
open->op_stateowner = sop;
}
list_del_init(&sop->so_close_lru);
@@ -2619,7 +2626,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
stp = nfs4_alloc_stateid();
if (stp == NULL)
- return nfserr_resource;
+ return nfserr_jukebox;
status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open);
if (status) {
@@ -2850,7 +2857,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfserr_bad_stateid;
if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
goto out;
- status = nfserr_resource;
+ status = nfserr_jukebox;
fp = alloc_init_file(ino);
if (fp == NULL)
goto out;
@@ -3530,8 +3537,9 @@ static inline void nfs4_file_downgrade(struct nfs4_stateid *stp, unsigned int to
int i;
for (i = 1; i < 4; i++) {
- if (test_bit(i, &stp->st_access_bmap) && !(i & to_access)) {
- nfs4_file_put_access(stp->st_file, i);
+ if (test_bit(i, &stp->st_access_bmap)
+ && ((i & to_access) != i)) {
+ nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(i));
__clear_bit(i, &stp->st_access_bmap);
}
}
@@ -3562,6 +3570,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
if (!access_valid(od->od_share_access, cstate->minorversion)
|| !deny_valid(od->od_share_deny))
return nfserr_inval;
+ /* We don't yet support WANT bits: */
+ od->od_share_access &= NFS4_SHARE_ACCESS_MASK;
nfs4_lock_state();
if ((status = nfs4_preprocess_seqid_op(cstate,
@@ -4035,7 +4045,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* XXX: Do we need to check for duplicate stateowners on
* the same file, or should they just be allowed (and
* create new stateids)? */
- status = nfserr_resource;
+ status = nfserr_jukebox;
lock_sop = alloc_init_lock_stateowner(strhashval,
open_sop->so_client, open_stp, lock);
if (lock_sop == NULL)
@@ -4119,9 +4129,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case (EDEADLK):
status = nfserr_deadlock;
break;
- default:
+ default:
dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
- status = nfserr_resource;
+ status = nfserrno(err);
break;
}
out:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c8bf405d19d..f8109960525 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1623,6 +1623,18 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
\
save = resp->p;
+static bool seqid_mutating_err(__be32 err)
+{
+ /* rfc 3530 section 8.1.5: */
+ return err != nfserr_stale_clientid &&
+ err != nfserr_stale_stateid &&
+ err != nfserr_bad_stateid &&
+ err != nfserr_bad_seqid &&
+ err != nfserr_bad_xdr &&
+ err != nfserr_resource &&
+ err != nfserr_nofilehandle;
+}
+
/*
* Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This
* is where sequence id's are incremented, and the replay cache is filled.
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4eefaf1b42e..5cfebe50405 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -447,12 +447,6 @@ struct nfs4_stateid {
#define WR_STATE 0x00000020
#define CLOSE_STATE 0x00000040
-#define seqid_mutating_err(err) \
- (((err) != nfserr_stale_clientid) && \
- ((err) != nfserr_bad_seqid) && \
- ((err) != nfserr_stale_stateid) && \
- ((err) != nfserr_bad_stateid))
-
struct nfsd4_compound_state;
extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fd0acca5370..acf88aea211 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2114,7 +2114,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
/* Allow read access to binaries even when mode 111 */
if (err == -EACCES && S_ISREG(inode->i_mode) &&
- acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
+ (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
+ acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
err = inode_permission(inode, MAY_EXEC);
return err? nfserrno(err) : 0;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index e0bbac04d1d..a22e40e2786 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -25,6 +25,7 @@
#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
#define NFSD_MAY_NOT_BREAK_LEASE 512
#define NFSD_MAY_BYPASS_GSS 1024
+#define NFSD_MAY_READ_IF_EXEC 2048
#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5afaa58a863..c7d4ee663f1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1039,6 +1039,9 @@ static int show_numa_map(struct seq_file *m, void *v)
seq_printf(m, " stack");
}
+ if (is_vm_hugetlb_page(vma))
+ seq_printf(m, " huge");
+
walk_page_range(vma->vm_start, vma->vm_end, &walk);
if (!md->pages)
diff --git a/fs/stat.c b/fs/stat.c
index 78a3aa83c7e..8806b8997d2 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -294,15 +294,16 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
{
struct path path;
int error;
+ int empty = 0;
if (bufsiz <= 0)
return -EINVAL;
- error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
+ error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty);
if (!error) {
struct inode *inode = path.dentry->d_inode;
- error = -EINVAL;
+ error = empty ? -ENOENT : -EINVAL;
if (inode->i_op->readlink) {
error = security_inode_readlink(path.dentry);
if (!error) {
diff --git a/fs/statfs.c b/fs/statfs.c
index 8244924dec5..9cf04a11896 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -76,7 +76,7 @@ EXPORT_SYMBOL(vfs_statfs);
int user_statfs(const char __user *pathname, struct kstatfs *st)
{
struct path path;
- int error = user_path(pathname, &path);
+ int error = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
if (!error) {
error = vfs_statfs(&path, st);
path_put(&path);
diff --git a/fs/super.c b/fs/super.c
index 3f56a269a4f..32a81f3467e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -61,7 +61,7 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
return -1;
if (!grab_super_passive(sb))
- return -1;
+ return !sc->nr_to_scan ? 0 : -1;
if (sb->s_op && sb->s_op->nr_cached_objects)
fs_objects = sb->s_op->nr_cached_objects(sb);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 620972b8094..8e8b06b90b6 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -320,7 +320,6 @@ extern struct list_head *xfs_get_buftarg_list(void);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
-#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)
#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7f7b42469ea..b7e75c6ba09 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -317,7 +317,19 @@ xfs_file_aio_read(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if (unlikely(ioflags & IO_ISDIRECT)) {
+ /*
+ * Locking is a bit tricky here. If we take an exclusive lock
+ * for direct IO, we effectively serialise all new concurrent
+ * read IO to this file and block it behind IO that is currently in
+ * progress because IO in progress holds the IO lock shared. We only
+ * need to hold the lock exclusive to blow away the page cache, so
+ * only take lock exclusively if the page cache needs invalidation.
+ * This allows the normal direct IO case of no page cache pages to
+ * proceeed concurrently without serialisation.
+ */
+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+ if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
if (inode->i_mapping->nrpages) {
@@ -330,8 +342,7 @@ xfs_file_aio_read(
}
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
- } else
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+ }
trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
@@ -666,6 +677,7 @@ xfs_file_aio_write_checks(
xfs_fsize_t new_size;
int error = 0;
+ xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
if (error) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
@@ -757,14 +769,24 @@ xfs_file_dio_aio_write(
*iolock = XFS_IOLOCK_EXCL;
else
*iolock = XFS_IOLOCK_SHARED;
- xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+ xfs_rw_ilock(ip, *iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
if (ret)
return ret;
+ /*
+ * Recheck if there are cached pages that need invalidate after we got
+ * the iolock to protect against other threads adding new pages while
+ * we were waiting for the iolock.
+ */
+ if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) {
+ xfs_rw_iunlock(ip, *iolock);
+ *iolock = XFS_IOLOCK_EXCL;
+ xfs_rw_ilock(ip, *iolock);
+ }
+
if (mapping->nrpages) {
- WARN_ON(*iolock != XFS_IOLOCK_EXCL);
ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
FI_REMAPF_LOCKED);
if (ret)
@@ -809,7 +831,7 @@ xfs_file_buffered_aio_write(
size_t count = ocount;
*iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+ xfs_rw_ilock(ip, *iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
if (ret)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 673704fab74..474920b4655 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -465,7 +465,7 @@ xfs_vn_getattr(
trace_xfs_getattr(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -XFS_ERROR(EIO);
stat->size = XFS_ISIZE(ip);
stat->dev = inode->i_sb->s_dev;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 0081657ad98..d4d5775d6e2 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -44,9 +44,6 @@
#include "xfs_trace.h"
-STATIC void xfs_unmountfs_wait(xfs_mount_t *);
-
-
#ifdef HAVE_PERCPU_SB
STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
int);
@@ -1496,11 +1493,6 @@ xfs_unmountfs(
*/
xfs_log_force(mp, XFS_LOG_SYNC);
- xfs_binval(mp->m_ddev_targp);
- if (mp->m_rtdev_targp) {
- xfs_binval(mp->m_rtdev_targp);
- }
-
/*
* Unreserve any blocks we have so that when we unmount we don't account
* the reserved free space as used. This is really only necessary for
@@ -1526,7 +1518,16 @@ xfs_unmountfs(
xfs_warn(mp, "Unable to update superblock counters. "
"Freespace may not be correct on next mount.");
xfs_unmountfs_writesb(mp);
- xfs_unmountfs_wait(mp); /* wait for async bufs */
+
+ /*
+ * Make sure all buffers have been flushed and completed before
+ * unmounting the log.
+ */
+ error = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+ if (error)
+ xfs_warn(mp, "%d busy buffers during unmount.", error);
+ xfs_wait_buftarg(mp->m_ddev_targp);
+
xfs_log_unmount_write(mp);
xfs_log_unmount(mp);
xfs_uuid_unmount(mp);
@@ -1537,16 +1538,6 @@ xfs_unmountfs(
xfs_free_perag(mp);
}
-STATIC void
-xfs_unmountfs_wait(xfs_mount_t *mp)
-{
- if (mp->m_logdev_targp != mp->m_ddev_targp)
- xfs_wait_buftarg(mp->m_logdev_targp);
- if (mp->m_rtdev_targp)
- xfs_wait_buftarg(mp->m_rtdev_targp);
- xfs_wait_buftarg(mp->m_ddev_targp);
-}
-
int
xfs_fs_writable(xfs_mount_t *mp)
{
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 9a0aa76facd..95ba6dc885a 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -674,7 +674,8 @@ xfs_qm_dqattach_one(
* disk and we didn't ask it to allocate;
* ESRCH if quotas got turned off suddenly.
*/
- error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
+ error = xfs_qm_dqget(ip->i_mount, ip, id, type,
+ doalloc | XFS_QMOPT_DOWARN, &dqp);
if (error)
return error;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 51fc429527b..b9e28730bbd 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -113,7 +113,7 @@ xfs_readlink(
char *link)
{
xfs_mount_t *mp = ip->i_mount;
- int pathlen;
+ xfs_fsize_t pathlen;
int error = 0;
trace_xfs_readlink(ip);
@@ -123,13 +123,19 @@ xfs_readlink(
xfs_ilock(ip, XFS_ILOCK_SHARED);
- ASSERT(S_ISLNK(ip->i_d.di_mode));
- ASSERT(ip->i_d.di_size <= MAXPATHLEN);
-
pathlen = ip->i_d.di_size;
if (!pathlen)
goto out;
+ if (pathlen < 0 || pathlen > MAXPATHLEN) {
+ xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
+ __func__, (unsigned long long) ip->i_ino,
+ (long long) pathlen);
+ ASSERT(0);
+ return XFS_ERROR(EFSCORRUPTED);
+ }
+
+
if (ip->i_df.if_flags & XFS_IFINLINE) {
memcpy(link, ip->i_df.if_u1.if_data, pathlen);
link[pathlen] = '\0';