From 66ee59af630fd8d5f4f56fb28162857e629aa0ab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Feb 2015 19:56:46 +0100 Subject: fs: remove ki_nbytes There is no need to pass the total request length in the kiocb, as we already get passed in through the iov_iter argument. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/aio.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'fs/aio.c') diff --git a/fs/aio.c b/fs/aio.c index 118a2e0088d8..667054c7c067 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1344,12 +1344,13 @@ typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, int rw, char __user *buf, unsigned long *nr_segs, + size_t *len, struct iovec **iovec, bool compat) { ssize_t ret; - *nr_segs = kiocb->ki_nbytes; + *nr_segs = *len; #ifdef CONFIG_COMPAT if (compat) @@ -1364,21 +1365,22 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, if (ret < 0) return ret; - /* ki_nbytes now reflect bytes instead of segs */ - kiocb->ki_nbytes = ret; + /* len now reflect bytes instead of segs */ + *len = ret; return 0; } static ssize_t aio_setup_single_vector(struct kiocb *kiocb, int rw, char __user *buf, unsigned long *nr_segs, + size_t len, struct iovec *iovec) { - if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes))) + if (unlikely(!access_ok(!rw, buf, len))) return -EFAULT; iovec->iov_base = buf; - iovec->iov_len = kiocb->ki_nbytes; + iovec->iov_len = len; *nr_segs = 1; return 0; } @@ -1388,7 +1390,7 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb, * Performs the initial checks and io submission. */ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, - char __user *buf, bool compat) + char __user *buf, size_t len, bool compat) { struct file *file = req->ki_filp; ssize_t ret; @@ -1423,21 +1425,21 @@ rw_common: if (!rw_op && !iter_op) return -EINVAL; - ret = (opcode == IOCB_CMD_PREADV || - opcode == IOCB_CMD_PWRITEV) - ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, - &iovec, compat) - : aio_setup_single_vector(req, rw, buf, &nr_segs, - iovec); + if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) + ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs, + &len, &iovec, compat); + else + ret = aio_setup_single_vector(req, rw, buf, &nr_segs, + len, iovec); if (!ret) - ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); + ret = rw_verify_area(rw, file, &req->ki_pos, len); if (ret < 0) { if (iovec != inline_vecs) kfree(iovec); return ret; } - req->ki_nbytes = ret; + len = ret; /* XXX: move/kill - rw_verify_area()? */ /* This matches the pread()/pwrite() logic */ @@ -1450,7 +1452,7 @@ rw_common: file_start_write(file); if (iter_op) { - iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); + iov_iter_init(&iter, rw, iovec, nr_segs, len); ret = iter_op(req, &iter); } else { ret = rw_op(req, iovec, nr_segs, req->ki_pos); @@ -1553,10 +1555,10 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_obj.user = user_iocb; req->ki_user_data = iocb->aio_data; req->ki_pos = iocb->aio_offset; - req->ki_nbytes = iocb->aio_nbytes; ret = aio_run_iocb(req, iocb->aio_lio_opcode, (char __user *)(unsigned long)iocb->aio_buf, + iocb->aio_nbytes, compat); if (ret) goto out_put_req; -- cgit v1.2.3 From 599bd19bdc4c6b20fd91d50f2f79dececbaf80c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Feb 2015 19:59:44 +0100 Subject: fs: don't allow to complete sync iocbs through aio_complete The AIO interface is fairly complex because it tries to allow filesystems to always work async and then wakeup a synchronous caller through aio_complete. It turns out that basically no one was doing this to avoid the complexity and context switches, and we've already fixed up the remaining users and can now get rid of this case. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/aio.c | 24 +----------------------- fs/ecryptfs/file.c | 6 ------ fs/read_write.c | 26 ++++++++------------------ include/linux/aio.h | 4 ---- net/socket.c | 9 +++------ 5 files changed, 12 insertions(+), 57 deletions(-) (limited to 'fs/aio.c') diff --git a/fs/aio.c b/fs/aio.c index 667054c7c067..8ca8df1c3550 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -778,22 +778,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, return 0; } -/* wait_on_sync_kiocb: - * Waits on the given sync kiocb to complete. - */ -ssize_t wait_on_sync_kiocb(struct kiocb *req) -{ - while (!req->ki_ctx) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (req->ki_ctx) - break; - io_schedule(); - } - __set_current_state(TASK_RUNNING); - return req->ki_user_data; -} -EXPORT_SYMBOL(wait_on_sync_kiocb); - /* * exit_aio: called when the last user of mm goes away. At this point, there is * no way for any new requests to be submited or any of the io_* syscalls to be @@ -1025,13 +1009,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) * ref, no other paths have a way to get another ref * - the sync task helpfully left a reference to itself in the iocb */ - if (is_sync_kiocb(iocb)) { - iocb->ki_user_data = res; - smp_wmb(); - iocb->ki_ctx = ERR_PTR(-EXDEV); - wake_up_process(iocb->ki_obj.tsk); - return; - } + BUG_ON(is_sync_kiocb(iocb)); if (iocb->ki_list.next) { unsigned long flags; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 6f4e659f508f..a36da8841e0c 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -52,12 +52,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, struct file *file = iocb->ki_filp; rc = generic_file_read_iter(iocb, to); - /* - * Even though this is a async interface, we need to wait - * for IO to finish to update atime - */ - if (-EIOCBQUEUED == rc) - rc = wait_on_sync_kiocb(iocb); if (rc >= 0) { path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); touch_atime(path); diff --git a/fs/read_write.c b/fs/read_write.c index f8b8fc1316ab..76e324e8ce8d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -346,9 +346,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) iter->type |= READ; ret = file->f_op->read_iter(&kiocb, iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - + BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; return ret; @@ -368,9 +366,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) iter->type |= WRITE; ret = file->f_op->write_iter(&kiocb, iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - + BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; return ret; @@ -426,8 +422,7 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp kiocb.ki_pos = *ppos; ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -446,8 +441,7 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p iov_iter_init(&iter, READ, &iov, 1, len); ret = filp->f_op->read_iter(&kiocb, &iter); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -508,8 +502,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof kiocb.ki_pos = *ppos; ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -528,8 +521,7 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo iov_iter_init(&iter, WRITE, &iov, 1, len); ret = filp->f_op->write_iter(&kiocb, &iter); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -716,8 +708,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iove iov_iter_init(&iter, rw, iov, nr_segs, len); ret = fn(&kiocb, &iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -732,8 +723,7 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, kiocb.ki_pos = *ppos; ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } diff --git a/include/linux/aio.h b/include/linux/aio.h index 132d1ecba435..f8516430490d 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -37,7 +37,6 @@ struct kiocb { union { void __user *user; - struct task_struct *tsk; } ki_obj; __u64 ki_user_data; /* user's data for completion */ @@ -63,13 +62,11 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) *kiocb = (struct kiocb) { .ki_ctx = NULL, .ki_filp = filp, - .ki_obj.tsk = current, }; } /* prototypes */ #ifdef CONFIG_AIO -extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); extern void aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); @@ -77,7 +74,6 @@ extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } diff --git a/net/socket.c b/net/socket.c index f92145554f34..f6c519d7b3ba 100644 --- a/net/socket.c +++ b/net/socket.c @@ -633,8 +633,7 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, init_sync_kiocb(&iocb, NULL); ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : __sock_sendmsg(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + BUG_ON(ret == -EIOCBQUEUED); return ret; } @@ -766,8 +765,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, init_sync_kiocb(&iocb, NULL); ret = __sock_recvmsg(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + BUG_ON(ret == -EIOCBQUEUED); return ret; } EXPORT_SYMBOL(sock_recvmsg); @@ -780,8 +778,7 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, init_sync_kiocb(&iocb, NULL); ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + BUG_ON(ret == -EIOCBQUEUED); return ret; } -- cgit v1.2.3 From 04b2fa9f8f36ec6fb6fd1c9dc9df6fff0cd27323 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2015 14:49:06 +0100 Subject: fs: split generic and aio kiocb Most callers in the kernel want to perform synchronous file I/O, but still have to bloat the stack with a full struct kiocb. Split out the parts needed in filesystem code from those in the aio code, and only allocate those needed to pass down argument on the stack. The aio code embedds the generic iocb in the one it allocates and can easily get back to it by using container_of. Also add a ->ki_complete method to struct kiocb, this is used to call into the aio code and thus removes the dependency on aio for filesystems impementing asynchronous operations. It will also allow other callers to substitute their own completion callback. We also add a new ->ki_flags field to work around the nasty layering violation recently introduced in commit 5e33f6 ("usb: gadget: ffs: add eventfd notification about ffs events"). Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- drivers/usb/gadget/function/f_fs.c | 5 +- drivers/usb/gadget/legacy/inode.c | 5 +- fs/aio.c | 94 ++++++++++++++++++++++++++------------ fs/direct-io.c | 4 +- fs/fuse/file.c | 2 +- fs/nfs/direct.c | 2 +- include/linux/aio.h | 46 +++---------------- 7 files changed, 81 insertions(+), 77 deletions(-) (limited to 'fs/aio.c') diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 175c9956cbe3..b64538b498dc 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -655,9 +655,10 @@ static void ffs_user_copy_worker(struct work_struct *work) unuse_mm(io_data->mm); } - aio_complete(io_data->kiocb, ret, ret); + io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); - if (io_data->ffs->ffs_eventfd && !io_data->kiocb->ki_eventfd) + if (io_data->ffs->ffs_eventfd && + !(io_data->kiocb->ki_flags & IOCB_EVENTFD)) eventfd_signal(io_data->ffs->ffs_eventfd, 1); usb_ep_free_request(io_data->ep, io_data->req); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 200f9a584064..a4a80694f607 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -469,7 +469,7 @@ static void ep_user_copy_worker(struct work_struct *work) ret = -EFAULT; /* completing the iocb can drop the ctx and mm, don't touch mm after */ - aio_complete(iocb, ret, ret); + iocb->ki_complete(iocb, ret, ret); kfree(priv->buf); kfree(priv->to_free); @@ -497,7 +497,8 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) kfree(priv); iocb->private = NULL; /* aio_complete() reports bytes-transferred _and_ faults */ - aio_complete(iocb, req->actual ? req->actual : req->status, + + iocb->ki_complete(iocb, req->actual ? req->actual : req->status, req->status); } else { /* ep_copy_to_user() won't report both; we hide some faults */ diff --git a/fs/aio.c b/fs/aio.c index 8ca8df1c3550..958286536a10 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -151,6 +151,38 @@ struct kioctx { unsigned id; }; +/* + * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either + * cancelled or completed (this makes a certain amount of sense because + * successful cancellation - io_cancel() - does deliver the completion to + * userspace). + * + * And since most things don't implement kiocb cancellation and we'd really like + * kiocb completion to be lockless when possible, we use ki_cancel to + * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED + * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). + */ +#define KIOCB_CANCELLED ((void *) (~0ULL)) + +struct aio_kiocb { + struct kiocb common; + + struct kioctx *ki_ctx; + kiocb_cancel_fn *ki_cancel; + + struct iocb __user *ki_user_iocb; /* user's aiocb */ + __u64 ki_user_data; /* user's data for completion */ + + struct list_head ki_list; /* the aio core uses this + * for cancellation */ + + /* + * If the aio_resfd field of the userspace iocb is not zero, + * this is the underlying eventfd context to deliver events to. + */ + struct eventfd_ctx *ki_eventfd; +}; + /*------ sysctl variables----*/ static DEFINE_SPINLOCK(aio_nr_lock); unsigned long aio_nr; /* current system wide number of aio requests */ @@ -220,7 +252,7 @@ static int __init aio_setup(void) if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); - kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); + kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); @@ -480,8 +512,9 @@ static int aio_setup_ring(struct kioctx *ctx) #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) -void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) +void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { + struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common); struct kioctx *ctx = req->ki_ctx; unsigned long flags; @@ -496,7 +529,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) } EXPORT_SYMBOL(kiocb_set_cancel_fn); -static int kiocb_cancel(struct kiocb *kiocb) +static int kiocb_cancel(struct aio_kiocb *kiocb) { kiocb_cancel_fn *old, *cancel; @@ -514,7 +547,7 @@ static int kiocb_cancel(struct kiocb *kiocb) cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); } while (cancel != old); - return cancel(kiocb); + return cancel(&kiocb->common); } static void free_ioctx(struct work_struct *work) @@ -550,13 +583,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref) static void free_ioctx_users(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, users); - struct kiocb *req; + struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); while (!list_empty(&ctx->active_reqs)) { req = list_first_entry(&ctx->active_reqs, - struct kiocb, ki_list); + struct aio_kiocb, ki_list); list_del_init(&req->ki_list); kiocb_cancel(req); @@ -932,9 +965,9 @@ static void user_refill_reqs_available(struct kioctx *ctx) * Allocate a slot for an aio request. * Returns NULL if no requests are free. */ -static inline struct kiocb *aio_get_req(struct kioctx *ctx) +static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) { - struct kiocb *req; + struct aio_kiocb *req; if (!get_reqs_available(ctx)) { user_refill_reqs_available(ctx); @@ -955,10 +988,10 @@ out_put: return NULL; } -static void kiocb_free(struct kiocb *req) +static void kiocb_free(struct aio_kiocb *req) { - if (req->ki_filp) - fput(req->ki_filp); + if (req->common.ki_filp) + fput(req->common.ki_filp); if (req->ki_eventfd != NULL) eventfd_ctx_put(req->ki_eventfd); kmem_cache_free(kiocb_cachep, req); @@ -994,8 +1027,9 @@ out: /* aio_complete * Called when the io request on the given iocb is complete. */ -void aio_complete(struct kiocb *iocb, long res, long res2) +static void aio_complete(struct kiocb *kiocb, long res, long res2) { + struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common); struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; struct io_event *ev_page, *event; @@ -1009,7 +1043,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) * ref, no other paths have a way to get another ref * - the sync task helpfully left a reference to itself in the iocb */ - BUG_ON(is_sync_kiocb(iocb)); + BUG_ON(is_sync_kiocb(kiocb)); if (iocb->ki_list.next) { unsigned long flags; @@ -1035,7 +1069,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); event = ev_page + pos % AIO_EVENTS_PER_PAGE; - event->obj = (u64)(unsigned long)iocb->ki_obj.user; + event->obj = (u64)(unsigned long)iocb->ki_user_iocb; event->data = iocb->ki_user_data; event->res = res; event->res2 = res2; @@ -1044,7 +1078,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", - ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, + ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data, res, res2); /* after flagging the request as done, we @@ -1091,7 +1125,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) percpu_ref_put(&ctx->reqs); } -EXPORT_SYMBOL(aio_complete); /* aio_read_events_ring * Pull an event off of the ioctx's event ring. Returns the number of @@ -1480,7 +1513,7 @@ rw_common: static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb, bool compat) { - struct kiocb *req; + struct aio_kiocb *req; ssize_t ret; /* enforce forwards compatibility on users */ @@ -1503,11 +1536,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!req)) return -EAGAIN; - req->ki_filp = fget(iocb->aio_fildes); - if (unlikely(!req->ki_filp)) { + req->common.ki_filp = fget(iocb->aio_fildes); + if (unlikely(!req->common.ki_filp)) { ret = -EBADF; goto out_put_req; } + req->common.ki_pos = iocb->aio_offset; + req->common.ki_complete = aio_complete; + req->common.ki_flags = 0; if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* @@ -1522,6 +1558,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_eventfd = NULL; goto out_put_req; } + + req->common.ki_flags |= IOCB_EVENTFD; } ret = put_user(KIOCB_KEY, &user_iocb->aio_key); @@ -1530,11 +1568,10 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, goto out_put_req; } - req->ki_obj.user = user_iocb; + req->ki_user_iocb = user_iocb; req->ki_user_data = iocb->aio_data; - req->ki_pos = iocb->aio_offset; - ret = aio_run_iocb(req, iocb->aio_lio_opcode, + ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode, (char __user *)(unsigned long)iocb->aio_buf, iocb->aio_nbytes, compat); @@ -1623,10 +1660,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, /* lookup_kiocb * Finds a given iocb for cancellation. */ -static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, - u32 key) +static struct aio_kiocb * +lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key) { - struct list_head *pos; + struct aio_kiocb *kiocb; assert_spin_locked(&ctx->ctx_lock); @@ -1634,9 +1671,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, return NULL; /* TODO: use a hash or array, this sucks. */ - list_for_each(pos, &ctx->active_reqs) { - struct kiocb *kiocb = list_kiocb(pos); - if (kiocb->ki_obj.user == iocb) + list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { + if (kiocb->ki_user_iocb == iocb) return kiocb; } return NULL; @@ -1656,7 +1692,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result) { struct kioctx *ctx; - struct kiocb *kiocb; + struct aio_kiocb *kiocb; u32 key; int ret; diff --git a/fs/direct-io.c b/fs/direct-io.c index e181b6b2e297..c38b460776e6 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -265,7 +265,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, ret = err; } - aio_complete(dio->iocb, ret, 0); + dio->iocb->ki_complete(dio->iocb, ret, 0); } kmem_cache_free(dio_cache, dio); @@ -1056,7 +1056,7 @@ static inline int drop_refcount(struct dio *dio) * operation. AIO can if it was a broken operation described above or * in fact if all the bios race to complete before we get here. In * that case dio_complete() translates the EIOCBQUEUED into the proper - * return code that the caller will hand to aio_complete(). + * return code that the caller will hand to ->complete(). * * This is managed by the bio_lock instead of being an atomic_t so that * completion paths can drop their ref and use the remaining count to diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f81d83eb9758..a5c5e38b3ff8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -584,7 +584,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) spin_unlock(&fc->lock); } - aio_complete(io->iocb, res, 0); + io->iocb->ki_complete(io->iocb, res, 0); kfree(io); } } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 27cebf164070..5db3385fc108 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) long res = (long) dreq->error; if (!res) res = (long) dreq->count; - aio_complete(dreq->iocb, res, 0); + dreq->iocb->ki_complete(dreq->iocb, res, 0); } complete_all(&dreq->completion); diff --git a/include/linux/aio.h b/include/linux/aio.h index f8516430490d..5c40b61285ac 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -14,67 +14,38 @@ struct kiocb; #define KIOCB_KEY 0 -/* - * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either - * cancelled or completed (this makes a certain amount of sense because - * successful cancellation - io_cancel() - does deliver the completion to - * userspace). - * - * And since most things don't implement kiocb cancellation and we'd really like - * kiocb completion to be lockless when possible, we use ki_cancel to - * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED - * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). - */ -#define KIOCB_CANCELLED ((void *) (~0ULL)) - typedef int (kiocb_cancel_fn)(struct kiocb *); +#define IOCB_EVENTFD (1 << 0) + struct kiocb { struct file *ki_filp; - struct kioctx *ki_ctx; /* NULL for sync ops */ - kiocb_cancel_fn *ki_cancel; - void *private; - - union { - void __user *user; - } ki_obj; - - __u64 ki_user_data; /* user's data for completion */ loff_t ki_pos; - - struct list_head ki_list; /* the aio core uses this - * for cancellation */ - - /* - * If the aio_resfd field of the userspace iocb is not zero, - * this is the underlying eventfd context to deliver events to. - */ - struct eventfd_ctx *ki_eventfd; + void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); + void *private; + int ki_flags; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) { - return kiocb->ki_ctx == NULL; + return kiocb->ki_complete == NULL; } static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { - .ki_ctx = NULL, .ki_filp = filp, }; } /* prototypes */ #ifdef CONFIG_AIO -extern void aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } static inline long do_io_submit(aio_context_t ctx_id, long nr, @@ -84,11 +55,6 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) { } #endif /* CONFIG_AIO */ -static inline struct kiocb *list_kiocb(struct list_head *h) -{ - return list_entry(h, struct kiocb, ki_list); -} - /* for sysctl: */ extern unsigned long aio_nr; extern unsigned long aio_max_nr; -- cgit v1.2.3 From 4c185ce06dca14f5cea192f5a2c981ef50663f2b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Mar 2015 20:17:32 -0400 Subject: aio: lift iov_iter_init() into aio_setup_..._rw() the only non-trivial detail is that we do it before rw_verify_area(), so we'd better cap the length ourselves in aio_setup_single_rw() case (for vectored case rw_copy_check_uvector() will do that for us). Signed-off-by: Al Viro --- fs/aio.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/aio.c') diff --git a/fs/aio.c b/fs/aio.c index 435ca29eca31..7816e8ec3c0e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1357,7 +1357,8 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, unsigned long *nr_segs, size_t *len, struct iovec **iovec, - bool compat) + bool compat, + struct iov_iter *iter) { ssize_t ret; @@ -1378,6 +1379,7 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, /* len now reflect bytes instead of segs */ *len = ret; + iov_iter_init(iter, rw, *iovec, *nr_segs, *len); return 0; } @@ -1385,14 +1387,18 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb, int rw, char __user *buf, unsigned long *nr_segs, size_t len, - struct iovec *iovec) + struct iovec *iovec, + struct iov_iter *iter) { + if (len > MAX_RW_COUNT) + len = MAX_RW_COUNT; if (unlikely(!access_ok(!rw, buf, len))) return -EFAULT; iovec->iov_base = buf; iovec->iov_len = len; *nr_segs = 1; + iov_iter_init(iter, rw, iovec, *nr_segs, len); return 0; } @@ -1438,10 +1444,10 @@ rw_common: if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs, - &len, &iovec, compat); + &len, &iovec, compat, &iter); else ret = aio_setup_single_vector(req, rw, buf, &nr_segs, - len, iovec); + len, iovec, &iter); if (!ret) ret = rw_verify_area(rw, file, &req->ki_pos, len); if (ret < 0) { @@ -1463,10 +1469,9 @@ rw_common: file_start_write(file); if (iter_op) { - iov_iter_init(&iter, rw, iovec, nr_segs, len); ret = iter_op(req, &iter); } else { - ret = rw_op(req, iovec, nr_segs, req->ki_pos); + ret = rw_op(req, iter.iov, iter.nr_segs, req->ki_pos); } if (rw == WRITE) -- cgit v1.2.3 From a96114fa1acaabca1091a27aacebd945a5733075 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Mar 2015 20:40:18 -0400 Subject: aio: simplify arguments of aio_setup_..._rw() We don't need req in either of those. We don't need nr_segs in caller. We don't really need len in caller either - iov_iter_count(&iter) will do. Signed-off-by: Al Viro --- fs/aio.c | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) (limited to 'fs/aio.c') diff --git a/fs/aio.c b/fs/aio.c index 7816e8ec3c0e..2ba172cfdeba 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1352,43 +1352,33 @@ typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, unsigned long, loff_t); typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); -static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - size_t *len, - struct iovec **iovec, - bool compat, - struct iov_iter *iter) +static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, + struct iovec **iovec, + bool compat, + struct iov_iter *iter) { ssize_t ret; - *nr_segs = *len; - #ifdef CONFIG_COMPAT if (compat) ret = compat_rw_copy_check_uvector(rw, (struct compat_iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); + len, UIO_FASTIOV, *iovec, iovec); else #endif ret = rw_copy_check_uvector(rw, (struct iovec __user *)buf, - *nr_segs, UIO_FASTIOV, *iovec, iovec); + len, UIO_FASTIOV, *iovec, iovec); if (ret < 0) return ret; - /* len now reflect bytes instead of segs */ - *len = ret; - iov_iter_init(iter, rw, *iovec, *nr_segs, *len); + iov_iter_init(iter, rw, *iovec, len, ret); return 0; } -static ssize_t aio_setup_single_vector(struct kiocb *kiocb, - int rw, char __user *buf, - unsigned long *nr_segs, - size_t len, - struct iovec *iovec, - struct iov_iter *iter) +static int aio_setup_single_vector(int rw, char __user *buf, size_t len, + struct iovec *iovec, + struct iov_iter *iter) { if (len > MAX_RW_COUNT) len = MAX_RW_COUNT; @@ -1397,8 +1387,7 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb, iovec->iov_base = buf; iovec->iov_len = len; - *nr_segs = 1; - iov_iter_init(iter, rw, iovec, *nr_segs, len); + iov_iter_init(iter, rw, iovec, 1, len); return 0; } @@ -1411,7 +1400,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, { struct file *file = req->ki_filp; ssize_t ret; - unsigned long nr_segs; int rw; fmode_t mode; aio_rw_op *rw_op; @@ -1443,13 +1431,14 @@ rw_common: return -EINVAL; if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) - ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs, - &len, &iovec, compat, &iter); + ret = aio_setup_vectored_rw(rw, buf, len, + &iovec, compat, &iter); else - ret = aio_setup_single_vector(req, rw, buf, &nr_segs, - len, iovec, &iter); + ret = aio_setup_single_vector(rw, buf, len, + iovec, &iter); if (!ret) - ret = rw_verify_area(rw, file, &req->ki_pos, len); + ret = rw_verify_area(rw, file, &req->ki_pos, + iov_iter_count(&iter)); if (ret < 0) { if (iovec != inline_vecs) kfree(iovec); -- cgit v1.2.3 From d4fb392f4c5e57197a43b729e6fad5e02e5acaca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Mar 2015 19:11:55 -0400 Subject: kill aio_setup_single_vector() identical to import_single_range() Signed-off-by: Al Viro --- fs/aio.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'fs/aio.c') diff --git a/fs/aio.c b/fs/aio.c index 2ba172cfdeba..3dba5a70ad97 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1376,21 +1376,6 @@ static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, return 0; } -static int aio_setup_single_vector(int rw, char __user *buf, size_t len, - struct iovec *iovec, - struct iov_iter *iter) -{ - if (len > MAX_RW_COUNT) - len = MAX_RW_COUNT; - if (unlikely(!access_ok(!rw, buf, len))) - return -EFAULT; - - iovec->iov_base = buf; - iovec->iov_len = len; - iov_iter_init(iter, rw, iovec, 1, len); - return 0; -} - /* * aio_run_iocb: * Performs the initial checks and io submission. @@ -1434,8 +1419,7 @@ rw_common: ret = aio_setup_vectored_rw(rw, buf, len, &iovec, compat, &iter); else - ret = aio_setup_single_vector(rw, buf, len, - iovec, &iter); + ret = import_single_range(rw, buf, len, iovec, &iter); if (!ret) ret = rw_verify_area(rw, file, &req->ki_pos, iov_iter_count(&iter)); -- cgit v1.2.3 From 32a56afa23e157b444b6c2b943322ea0d119517b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Mar 2015 19:34:53 -0400 Subject: aio_setup_vectored_rw(): switch to {compat_,}import_iovec() Signed-off-by: Al Viro --- fs/aio.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) (limited to 'fs/aio.c') diff --git a/fs/aio.c b/fs/aio.c index 3dba5a70ad97..3b8467aeb5ee 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1357,23 +1357,14 @@ static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, bool compat, struct iov_iter *iter) { - ssize_t ret; - #ifdef CONFIG_COMPAT if (compat) - ret = compat_rw_copy_check_uvector(rw, + return compat_import_iovec(rw, (struct compat_iovec __user *)buf, - len, UIO_FASTIOV, *iovec, iovec); - else + len, UIO_FASTIOV, iovec, iter); #endif - ret = rw_copy_check_uvector(rw, - (struct iovec __user *)buf, - len, UIO_FASTIOV, *iovec, iovec); - if (ret < 0) - return ret; - - iov_iter_init(iter, rw, *iovec, len, ret); - return 0; + return import_iovec(rw, (struct iovec __user *)buf, + len, UIO_FASTIOV, iovec, iter); } /* @@ -1418,14 +1409,15 @@ rw_common: if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) ret = aio_setup_vectored_rw(rw, buf, len, &iovec, compat, &iter); - else + else { ret = import_single_range(rw, buf, len, iovec, &iter); + iovec = NULL; + } if (!ret) ret = rw_verify_area(rw, file, &req->ki_pos, iov_iter_count(&iter)); if (ret < 0) { - if (iovec != inline_vecs) - kfree(iovec); + kfree(iovec); return ret; } @@ -1449,6 +1441,7 @@ rw_common: if (rw == WRITE) file_end_write(file); + kfree(iovec); break; case IOCB_CMD_FDSYNC: @@ -1470,9 +1463,6 @@ rw_common: return -EINVAL; } - if (iovec != inline_vecs) - kfree(iovec); - if (ret != -EIOCBQUEUED) { /* * There's no easy way to restart the syscall since other AIO's -- cgit v1.2.3