From 793b80ef14af56d20c998265287648ad34239b6f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Mar 2016 16:03:58 +0100 Subject: vfs: pass a flags argument to vfs_readv/vfs_writev This way we can set kiocb flags also from the sync read/write path for the read_iter/write_iter operations. For now there is no way to pass flags to plain read/write operations as there is no real need for that, and all flags passed are explicitly rejected for these files. Signed-off-by: Milosz Tanski [hch: rebased on top of my kiocb changes] Signed-off-by: Christoph Hellwig Reviewed-by: Stephen Bates Tested-by: Stephen Bates Acked-by: Jeff Moyer Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1a2046275cdf..6ec87964644f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1712,9 +1712,9 @@ extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *) extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, - unsigned long, loff_t *); + unsigned long, loff_t *, int); extern ssize_t vfs_writev(struct file *, const struct iovec __user *, - unsigned long, loff_t *); + unsigned long, loff_t *, int); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, -- cgit v1.2.3 From f17d8b35452cab31a70d224964cd583fb2845449 Mon Sep 17 00:00:00 2001 From: Milosz Tanski Date: Thu, 3 Mar 2016 16:03:59 +0100 Subject: vfs: vfs: Define new syscalls preadv2,pwritev2 New syscalls that take an flag argument. No flags are added yet in this patch. Signed-off-by: Milosz Tanski [hch: rebased on top of my kiocb changes] Signed-off-by: Christoph Hellwig Reviewed-by: Stephen Bates Tested-by: Stephen Bates Acked-by: Jeff Moyer Signed-off-by: Al Viro --- fs/read_write.c | 161 ++++++++++++++++++++++++++++++++++++----------- include/linux/compat.h | 6 ++ include/linux/syscalls.h | 6 ++ 3 files changed, 138 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/fs/read_write.c b/fs/read_write.c index 7d453c3e1cb6..e9c9e2a667ce 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -895,15 +895,15 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, EXPORT_SYMBOL(vfs_writev); -SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, - unsigned long, vlen) +static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, int flags) { struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { loff_t pos = file_pos_read(f.file); - ret = vfs_readv(f.file, vec, vlen, &pos, 0); + ret = vfs_readv(f.file, vec, vlen, &pos, flags); if (ret >= 0) file_pos_write(f.file, pos); fdput_pos(f); @@ -915,15 +915,15 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, return ret; } -SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, - unsigned long, vlen) +static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, int flags) { struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { loff_t pos = file_pos_read(f.file); - ret = vfs_writev(f.file, vec, vlen, &pos, 0); + ret = vfs_writev(f.file, vec, vlen, &pos, flags); if (ret >= 0) file_pos_write(f.file, pos); fdput_pos(f); @@ -941,10 +941,9 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; } -SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, - unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) +static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, loff_t pos, int flags) { - loff_t pos = pos_from_hilo(pos_h, pos_l); struct fd f; ssize_t ret = -EBADF; @@ -955,7 +954,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, if (f.file) { ret = -ESPIPE; if (f.file->f_mode & FMODE_PREAD) - ret = vfs_readv(f.file, vec, vlen, &pos, 0); + ret = vfs_readv(f.file, vec, vlen, &pos, flags); fdput(f); } @@ -965,10 +964,9 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, return ret; } -SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, - unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) +static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, loff_t pos, int flags) { - loff_t pos = pos_from_hilo(pos_h, pos_l); struct fd f; ssize_t ret = -EBADF; @@ -979,7 +977,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, if (f.file) { ret = -ESPIPE; if (f.file->f_mode & FMODE_PWRITE) - ret = vfs_writev(f.file, vec, vlen, &pos, 0); + ret = vfs_writev(f.file, vec, vlen, &pos, flags); fdput(f); } @@ -989,6 +987,58 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, return ret; } +SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen) +{ + return do_readv(fd, vec, vlen, 0); +} + +SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen) +{ + return do_writev(fd, vec, vlen, 0); +} + +SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) +{ + loff_t pos = pos_from_hilo(pos_h, pos_l); + + return do_preadv(fd, vec, vlen, pos, 0); +} + +SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, + int, flags) +{ + loff_t pos = pos_from_hilo(pos_h, pos_l); + + if (pos == -1) + return do_readv(fd, vec, vlen, flags); + + return do_preadv(fd, vec, vlen, pos, flags); +} + +SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) +{ + loff_t pos = pos_from_hilo(pos_h, pos_l); + + return do_pwritev(fd, vec, vlen, pos, 0); +} + +SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, + int, flags) +{ + loff_t pos = pos_from_hilo(pos_h, pos_l); + + if (pos == -1) + return do_writev(fd, vec, vlen, flags); + + return do_pwritev(fd, vec, vlen, pos, flags); +} + #ifdef CONFIG_COMPAT static ssize_t compat_do_readv_writev(int type, struct file *file, @@ -1046,7 +1096,7 @@ out: static size_t compat_readv(struct file *file, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t *pos) + unsigned long vlen, loff_t *pos, int flags) { ssize_t ret = -EBADF; @@ -1057,7 +1107,7 @@ static size_t compat_readv(struct file *file, if (!(file->f_mode & FMODE_CAN_READ)) goto out; - ret = compat_do_readv_writev(READ, file, vec, vlen, pos, 0); + ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags); out: if (ret > 0) @@ -1066,9 +1116,9 @@ out: return ret; } -COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, - const struct compat_iovec __user *,vec, - compat_ulong_t, vlen) +static size_t do_compat_readv(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, int flags) { struct fd f = fdget_pos(fd); ssize_t ret; @@ -1077,16 +1127,24 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, if (!f.file) return -EBADF; pos = f.file->f_pos; - ret = compat_readv(f.file, vec, vlen, &pos); + ret = compat_readv(f.file, vec, vlen, &pos, flags); if (ret >= 0) f.file->f_pos = pos; fdput_pos(f); return ret; + } -static long __compat_sys_preadv64(unsigned long fd, +COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, + const struct compat_iovec __user *,vec, + compat_ulong_t, vlen) +{ + return do_compat_readv(fd, vec, vlen, 0); +} + +static long do_compat_preadv64(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos) + unsigned long vlen, loff_t pos, int flags) { struct fd f; ssize_t ret; @@ -1098,7 +1156,7 @@ static long __compat_sys_preadv64(unsigned long fd, return -EBADF; ret = -ESPIPE; if (f.file->f_mode & FMODE_PREAD) - ret = compat_readv(f.file, vec, vlen, &pos); + ret = compat_readv(f.file, vec, vlen, &pos, flags); fdput(f); return ret; } @@ -1108,7 +1166,7 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, const struct compat_iovec __user *,vec, unsigned long, vlen, loff_t, pos) { - return __compat_sys_preadv64(fd, vec, vlen, pos); + return do_compat_preadv64(fd, vec, vlen, pos, 0); } #endif @@ -1118,12 +1176,25 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return __compat_sys_preadv64(fd, vec, vlen, pos); + return do_compat_preadv64(fd, vec, vlen, pos, 0); +} + +COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, + const struct compat_iovec __user *,vec, + compat_ulong_t, vlen, u32, pos_low, u32, pos_high, + int, flags) +{ + loff_t pos = ((loff_t)pos_high << 32) | pos_low; + + if (pos == -1) + return do_compat_readv(fd, vec, vlen, flags); + + return do_compat_preadv64(fd, vec, vlen, pos, flags); } static size_t compat_writev(struct file *file, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t *pos) + unsigned long vlen, loff_t *pos, int flags) { ssize_t ret = -EBADF; @@ -1143,9 +1214,9 @@ out: return ret; } -COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, - const struct compat_iovec __user *, vec, - compat_ulong_t, vlen) +static size_t do_compat_writev(compat_ulong_t fd, + const struct compat_iovec __user* vec, + compat_ulong_t vlen, int flags) { struct fd f = fdget_pos(fd); ssize_t ret; @@ -1154,16 +1225,23 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, if (!f.file) return -EBADF; pos = f.file->f_pos; - ret = compat_writev(f.file, vec, vlen, &pos); + ret = compat_writev(f.file, vec, vlen, &pos, flags); if (ret >= 0) f.file->f_pos = pos; fdput_pos(f); return ret; } -static long __compat_sys_pwritev64(unsigned long fd, +COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, + const struct compat_iovec __user *, vec, + compat_ulong_t, vlen) +{ + return do_compat_writev(fd, vec, vlen, 0); +} + +static long do_compat_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos) + unsigned long vlen, loff_t pos, int flags) { struct fd f; ssize_t ret; @@ -1175,7 +1253,7 @@ static long __compat_sys_pwritev64(unsigned long fd, return -EBADF; ret = -ESPIPE; if (f.file->f_mode & FMODE_PWRITE) - ret = compat_writev(f.file, vec, vlen, &pos); + ret = compat_writev(f.file, vec, vlen, &pos, flags); fdput(f); return ret; } @@ -1185,7 +1263,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, const struct compat_iovec __user *,vec, unsigned long, vlen, loff_t, pos) { - return __compat_sys_pwritev64(fd, vec, vlen, pos); + return do_compat_pwritev64(fd, vec, vlen, pos, 0); } #endif @@ -1195,8 +1273,21 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return __compat_sys_pwritev64(fd, vec, vlen, pos); + return do_compat_pwritev64(fd, vec, vlen, pos, 0); +} + +COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, + const struct compat_iovec __user *,vec, + compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags) +{ + loff_t pos = ((loff_t)pos_high << 32) | pos_low; + + if (pos == -1) + return do_compat_writev(fd, vec, vlen, flags); + + return do_compat_pwritev64(fd, vec, vlen, pos, flags); } + #endif static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, diff --git a/include/linux/compat.h b/include/linux/compat.h index a76c9172b2eb..fe4ccd0c748a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -340,6 +340,12 @@ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, const struct compat_iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high); +asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags); +asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags); #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 asmlinkage long compat_sys_preadv64(unsigned long fd, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 185815c96433..d795472c54d8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -575,8 +575,14 @@ asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); +asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, unsigned long pos_l, unsigned long pos_h, + int flags); asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); +asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, unsigned long pos_l, unsigned long pos_h, + int flags); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode); asmlinkage long sys_chdir(const char __user *filename); -- cgit v1.2.3 From 97be7ebe53915af504fb491fb99f064c7cf3cb09 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Mar 2016 16:04:01 +0100 Subject: vfs: add the RWF_HIPRI flag for preadv2/pwritev2 This adds a flag that tells the file system that this is a high priority request for which it's worth to poll the hardware. The flag is purely advisory and can be ignored if not supported. Signed-off-by: Christoph Hellwig Reviewed-by: Stephen Bates Tested-by: Stephen Bates Acked-by: Jeff Moyer Signed-off-by: Al Viro --- fs/read_write.c | 6 ++++-- include/linux/fs.h | 1 + include/uapi/linux/fs.h | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/read_write.c b/fs/read_write.c index e9c9e2a667ce..07c53db04ec1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -697,10 +697,12 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, struct kiocb kiocb; ssize_t ret; - if (flags) + if (flags & ~RWF_HIPRI) return -EOPNOTSUPP; init_sync_kiocb(&kiocb, filp); + if (flags & RWF_HIPRI) + kiocb.ki_flags |= IOCB_HIPRI; kiocb.ki_pos = *ppos; ret = fn(&kiocb, iter); @@ -715,7 +717,7 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, { ssize_t ret = 0; - if (flags) + if (flags & ~RWF_HIPRI) return -EOPNOTSUPP; while (iov_iter_count(iter)) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ec87964644f..337de88ff50f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -320,6 +320,7 @@ struct writeback_control; #define IOCB_EVENTFD (1 << 0) #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) +#define IOCB_HIPRI (1 << 3) struct kiocb { struct file *ki_filp; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 41e0433b4a83..847c656729f8 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -305,4 +305,7 @@ struct fsxattr { #define SYNC_FILE_RANGE_WRITE 2 #define SYNC_FILE_RANGE_WAIT_AFTER 4 +/* flags for preadv2/pwritev2: */ +#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */ + #endif /* _UAPI_LINUX_FS_H */ -- cgit v1.2.3 From 8e0b60b96ba06d826a2b26e23b1986853a4e5291 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Mar 2016 16:04:03 +0100 Subject: blk-mq: enable polling support by default Now that applications need to explicitly ask for polling we can enable it by default in blk-mq drivers. Note that this will only have an affect on driver that supply a poll function, which currently only includes nvme. Signed-off-by: Christoph Hellwig Reviewed-by: Stephen Bates Tested-by: Stephen Bates Reviewed-by: Jeff Moyer Signed-off-by: Al Viro --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 29189aeace19..ff33a1ab58c9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -499,7 +499,8 @@ struct request_queue { #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ - (1 << QUEUE_FLAG_SAME_COMP)) + (1 << QUEUE_FLAG_SAME_COMP) | \ + (1 << QUEUE_FLAG_POLL)) static inline void queue_lockdep_assert_held(struct request_queue *q) { -- cgit v1.2.3 From 949a852e46dda07caaa0ff02e181f55d24e3ebf8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Mar 2016 14:20:52 -0500 Subject: namei: teach lookup_slow() to skip revalidate ... and make mountpoint_last() use it. That makes all candidates for lookup with parent locked shared go through lookup_slow(). Signed-off-by: Al Viro --- fs/namei.c | 58 +++++++++++++++++++++++++++++++-------------------- include/linux/namei.h | 1 + 2 files changed, 36 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/namei.c b/fs/namei.c index cb70a817d439..dbb8ec1a2006 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1605,7 +1605,29 @@ static struct dentry *lookup_slow(const struct qstr *name, { struct dentry *dentry; inode_lock(dir->d_inode); - dentry = __lookup_hash(name, dir, flags); + dentry = d_lookup(dir, name); + if (unlikely(dentry)) { + if ((dentry->d_flags & DCACHE_OP_REVALIDATE) && + !(flags & LOOKUP_NO_REVAL)) { + int error = d_revalidate(dentry, flags); + if (unlikely(error <= 0)) { + if (!error) + d_invalidate(dentry); + dput(dentry); + dentry = ERR_PTR(error); + } + } + if (dentry) { + inode_unlock(dir->d_inode); + return dentry; + } + } + dentry = d_alloc(dir, name); + if (unlikely(!dentry)) { + inode_unlock(dir->d_inode); + return ERR_PTR(-ENOMEM); + } + dentry = lookup_real(dir->d_inode, dentry, flags); inode_unlock(dir->d_inode); return dentry; } @@ -2425,31 +2447,21 @@ mountpoint_last(struct nameidata *nd, struct path *path) if (error) return error; dentry = dget(nd->path.dentry); - goto done; - } - - inode_lock(dir->d_inode); - dentry = d_lookup(dir, &nd->last); - if (!dentry) { - /* - * No cached dentry. Mounted dentries are pinned in the cache, - * so that means that this dentry is probably a symlink or the - * path doesn't actually point to a mounted dentry. - */ - dentry = d_alloc(dir, &nd->last); + } else { + dentry = d_lookup(dir, &nd->last); if (!dentry) { - inode_unlock(dir->d_inode); - return -ENOMEM; - } - dentry = lookup_real(dir->d_inode, dentry, nd->flags); - if (IS_ERR(dentry)) { - inode_unlock(dir->d_inode); - return PTR_ERR(dentry); + /* + * No cached dentry. Mounted dentries are pinned in the + * cache, so that means that this dentry is probably + * a symlink or the path doesn't actually point + * to a mounted dentry. + */ + dentry = lookup_slow(&nd->last, dir, + nd->flags | LOOKUP_NO_REVAL); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); } } - inode_unlock(dir->d_inode); - -done: if (d_is_negative(dentry)) { dput(dentry); return -ENOENT; diff --git a/include/linux/namei.h b/include/linux/namei.h index d0f25d81b46a..77d01700daf7 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -31,6 +31,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_PARENT 0x0010 #define LOOKUP_REVAL 0x0020 #define LOOKUP_RCU 0x0040 +#define LOOKUP_NO_REVAL 0x0080 /* * Intent data -- cgit v1.2.3 From 9d95afd5971918b1aa8db1960ba24532c2d6ec89 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 1 Mar 2016 15:35:06 -0500 Subject: kill dentry_unhash() the last user is gone Signed-off-by: Al Viro --- fs/namei.c | 25 ------------------------- include/linux/fs.h | 5 ----- 2 files changed, 30 deletions(-) (limited to 'include') diff --git a/fs/namei.c b/fs/namei.c index dbb8ec1a2006..794f81dce766 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3685,31 +3685,6 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) return sys_mkdirat(AT_FDCWD, pathname, mode); } -/* - * The dentry_unhash() helper will try to drop the dentry early: we - * should have a usage count of 1 if we're the only user of this - * dentry, and if that is true (possibly after pruning the dcache), - * then we drop the dentry now. - * - * A low-level filesystem can, if it choses, legally - * do a - * - * if (!d_unhashed(dentry)) - * return -EBUSY; - * - * if it cannot handle the case of removing a directory - * that is still in use by something else.. - */ -void dentry_unhash(struct dentry *dentry) -{ - shrink_dcache_parent(dentry); - spin_lock(&dentry->d_lock); - if (dentry->d_lockref.count == 1) - __d_drop(dentry); - spin_unlock(&dentry->d_lock); -} -EXPORT_SYMBOL(dentry_unhash); - int vfs_rmdir(struct inode *dir, struct dentry *dentry) { int error = may_delete(dir, dentry, 1); diff --git a/include/linux/fs.h b/include/linux/fs.h index ae681002100a..c577923cd400 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1539,11 +1539,6 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); -/* - * VFS dentry helper functions. - */ -extern void dentry_unhash(struct dentry *dentry); - /* * VFS file helper functions. */ -- cgit v1.2.3 From 668d0cd56ef7bc71be6dd8c081007221e09d9a86 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Mar 2016 12:44:17 -0500 Subject: replace d_add_unique() with saner primitive new primitive: d_exact_alias(dentry, inode). If there is an unhashed dentry with the same name/parent and given inode, rehash, grab and return it. Otherwise, return NULL. The only caller of d_add_unique() switched to d_exact_alias() + d_splice_alias(). Signed-off-by: Al Viro --- fs/dcache.c | 125 ++++++++++++++++++++----------------------------- fs/nfs/nfs4proc.c | 13 ++--- include/linux/dcache.h | 18 +------ 3 files changed, 58 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 2398f9f94337..4d20bf5c609b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1781,81 +1781,6 @@ void d_instantiate(struct dentry *entry, struct inode * inode) } EXPORT_SYMBOL(d_instantiate); -/** - * d_instantiate_unique - instantiate a non-aliased dentry - * @entry: dentry to instantiate - * @inode: inode to attach to this dentry - * - * Fill in inode information in the entry. On success, it returns NULL. - * If an unhashed alias of "entry" already exists, then we return the - * aliased dentry instead and drop one reference to inode. - * - * Note that in order to avoid conflicts with rename() etc, the caller - * had better be holding the parent directory semaphore. - * - * This also assumes that the inode count has been incremented - * (or otherwise set) by the caller to indicate that it is now - * in use by the dcache. - */ -static struct dentry *__d_instantiate_unique(struct dentry *entry, - struct inode *inode) -{ - struct dentry *alias; - int len = entry->d_name.len; - const char *name = entry->d_name.name; - unsigned int hash = entry->d_name.hash; - - if (!inode) { - __d_instantiate(entry, NULL); - return NULL; - } - - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { - /* - * Don't need alias->d_lock here, because aliases with - * d_parent == entry->d_parent are not subject to name or - * parent changes, because the parent inode i_mutex is held. - */ - if (alias->d_name.hash != hash) - continue; - if (alias->d_parent != entry->d_parent) - continue; - if (alias->d_name.len != len) - continue; - if (dentry_cmp(alias, name, len)) - continue; - __dget(alias); - return alias; - } - - __d_instantiate(entry, inode); - return NULL; -} - -struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) -{ - struct dentry *result; - - BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); - - if (inode) - spin_lock(&inode->i_lock); - result = __d_instantiate_unique(entry, inode); - if (inode) - spin_unlock(&inode->i_lock); - - if (!result) { - security_d_instantiate(entry, inode); - return NULL; - } - - BUG_ON(!d_unhashed(result)); - iput(inode); - return result; -} - -EXPORT_SYMBOL(d_instantiate_unique); - /** * d_instantiate_no_diralias - instantiate a non-aliased dentry * @entry: dentry to complete @@ -2436,6 +2361,56 @@ void d_rehash(struct dentry * entry) } EXPORT_SYMBOL(d_rehash); +/** + * d_exact_alias - find and hash an exact unhashed alias + * @entry: dentry to add + * @inode: The inode to go with this dentry + * + * If an unhashed dentry with the same name/parent and desired + * inode already exists, hash and return it. Otherwise, return + * NULL. + * + * Parent directory should be locked. + */ +struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode) +{ + struct dentry *alias; + int len = entry->d_name.len; + const char *name = entry->d_name.name; + unsigned int hash = entry->d_name.hash; + + spin_lock(&inode->i_lock); + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + /* + * Don't need alias->d_lock here, because aliases with + * d_parent == entry->d_parent are not subject to name or + * parent changes, because the parent inode i_mutex is held. + */ + if (alias->d_name.hash != hash) + continue; + if (alias->d_parent != entry->d_parent) + continue; + if (alias->d_name.len != len) + continue; + if (dentry_cmp(alias, name, len)) + continue; + spin_lock(&alias->d_lock); + if (!d_unhashed(alias)) { + spin_unlock(&alias->d_lock); + alias = NULL; + } else { + __dget_dlock(alias); + _d_rehash(alias); + spin_unlock(&alias->d_lock); + } + spin_unlock(&inode->i_lock); + return alias; + } + spin_unlock(&inode->i_lock); + return NULL; +} +EXPORT_SYMBOL(d_exact_alias); + /** * dentry_update_name_case - update case insensitive dentry with a new name * @dentry: dentry to be updated diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4bfc33ad0563..400a70b3be7b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2461,14 +2461,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, dentry = opendata->dentry; if (d_really_is_negative(dentry)) { - /* FIXME: Is this d_drop() ever needed? */ + struct dentry *alias; d_drop(dentry); - dentry = d_add_unique(dentry, igrab(state->inode)); - if (dentry == NULL) { - dentry = opendata->dentry; - } else if (dentry != ctx->dentry) { + alias = d_exact_alias(dentry, state->inode); + if (!alias) + alias = d_splice_alias(igrab(state->inode), dentry); + /* d_splice_alias() can't fail here - it's a non-directory */ + if (alias) { dput(ctx->dentry); - ctx->dentry = dget(dentry); + ctx->dentry = dentry = alias; } nfs_set_verifier(dentry, nfs_save_change_attribute(d_inode(opendata->dir))); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c4b5f4b3f8f8..bda4ec53886b 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -246,6 +246,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); +extern struct dentry * d_exact_alias(struct dentry *, struct inode *); extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern struct dentry * d_obtain_root(struct inode *); @@ -288,23 +289,6 @@ static inline void d_add(struct dentry *entry, struct inode *inode) d_rehash(entry); } -/** - * d_add_unique - add dentry to hash queues without aliasing - * @entry: dentry to add - * @inode: The inode to attach to this dentry - * - * This adds the entry to the hash queues and initializes @inode. - * The entry was actually filled in earlier during d_alloc(). - */ -static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode) -{ - struct dentry *res; - - res = d_instantiate_unique(entry, inode); - d_rehash(res != NULL ? res : entry); - return res; -} - extern void dentry_update_name_case(struct dentry *, struct qstr *); /* used for rename() and baskets */ -- cgit v1.2.3 From 34d0d19dc0929ccc326448737f05a8fae3d47b8a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Mar 2016 21:01:03 -0500 Subject: uninline d_add() Signed-off-by: Al Viro --- fs/dcache.c | 16 ++++++++++++++++ include/linux/dcache.h | 15 +-------------- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 4d20bf5c609b..12280df07837 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2361,6 +2361,22 @@ void d_rehash(struct dentry * entry) } EXPORT_SYMBOL(d_rehash); +/** + * d_add - add dentry to hash queues + * @entry: dentry to add + * @inode: The inode to attach to this dentry + * + * This adds the entry to the hash queues and initializes @inode. + * The entry was actually filled in earlier during d_alloc(). + */ + +void d_add(struct dentry *entry, struct inode *inode) +{ + d_instantiate(entry, inode); + d_rehash(entry); +} +EXPORT_SYMBOL(d_add); + /** * d_exact_alias - find and hash an exact unhashed alias * @entry: dentry to add diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bda4ec53886b..1c51d2d84a32 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -273,21 +273,8 @@ extern int have_submounts(struct dentry *); * This adds the entry to the hash queues. */ extern void d_rehash(struct dentry *); - -/** - * d_add - add dentry to hash queues - * @entry: dentry to add - * @inode: The inode to attach to this dentry - * - * This adds the entry to the hash queues and initializes @inode. - * The entry was actually filled in earlier during d_alloc(). - */ -static inline void d_add(struct dentry *entry, struct inode *inode) -{ - d_instantiate(entry, inode); - d_rehash(entry); -} +extern void d_add(struct dentry *, struct inode *); extern void dentry_update_name_case(struct dentry *, struct qstr *); -- cgit v1.2.3 From 27f203f655a2e1dab66598a5b19afb637c587f0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 9 Mar 2016 17:58:49 -0500 Subject: untangle fsnotify_d_instantiate() a bit First of all, don't bother calling it if inode is NULL - that makes inode argument unused. Moreover, do it *before* dropping ->d_lock, not right after that (and don't bother grabbing ->d_lock in it, of course). Signed-off-by: Al Viro --- fs/dcache.c | 3 ++- include/linux/fsnotify.h | 9 --------- include/linux/fsnotify_backend.h | 9 ++------- 3 files changed, 4 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 12280df07837..244fd2487fe9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1750,8 +1750,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); + if (inode) + __fsnotify_d_instantiate(dentry); spin_unlock(&dentry->d_lock); - fsnotify_d_instantiate(dentry, inode); } /** diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 7ee1774edee5..0141f257d67b 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -16,15 +16,6 @@ #include #include -/* - * fsnotify_d_instantiate - instantiate a dentry for inode - */ -static inline void fsnotify_d_instantiate(struct dentry *dentry, - struct inode *inode) -{ - __fsnotify_d_instantiate(dentry, inode); -} - /* Notify this dentry's parent about a child's events. */ static inline int fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6b7e89f45aa4..827b249259f7 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -293,14 +293,9 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) /* * fsnotify_d_instantiate - instantiate a dentry for inode */ -static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) +static inline void __fsnotify_d_instantiate(struct dentry *dentry) { - if (!inode) - return; - - spin_lock(&dentry->d_lock); __fsnotify_update_dcache_flags(dentry); - spin_unlock(&dentry->d_lock); } /* called from fsnotify listeners, such as fanotify or dnotify */ @@ -399,7 +394,7 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) {} -static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) +static inline void __fsnotify_d_instantiate(struct dentry *dentry) {} static inline u32 fsnotify_get_cookie(void) -- cgit v1.2.3