summaryrefslogtreecommitdiff
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c73
1 files changed, 53 insertions, 20 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1949b80677e7..dff34975d86b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -697,6 +697,7 @@ enum {
REQ_F_NO_FILE_TABLE_BIT,
REQ_F_LTIMEOUT_ACTIVE_BIT,
REQ_F_COMPLETE_INLINE_BIT,
+ REQ_F_REISSUE_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -740,6 +741,8 @@ enum {
REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
/* completion is deferred through io_comp_state */
REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT),
+ /* caller should reissue async */
+ REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT),
};
struct async_poll {
@@ -1213,7 +1216,7 @@ static void io_prep_async_work(struct io_kiocb *req)
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
- } else {
+ } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
@@ -2476,6 +2479,11 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
return false;
return true;
}
+#else
+static bool io_rw_should_reissue(struct io_kiocb *req)
+{
+ return false;
+}
#endif
static bool io_rw_reissue(struct io_kiocb *req)
@@ -2503,8 +2511,10 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
if (req->rw.kiocb.ki_flags & IOCB_WRITE)
kiocb_end_write(req);
- if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_reissue(req))
+ if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) {
+ req->flags |= REQ_F_REISSUE;
return;
+ }
if (res != req->result)
req_set_fail_links(req);
if (req->flags & REQ_F_BUFFER_SELECTED)
@@ -2752,6 +2762,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
struct io_async_rw *io = req->async_data;
+ bool check_reissue = kiocb->ki_complete == io_complete_rw;
/* add previously done IO, if any */
if (io && io->bytes_done > 0) {
@@ -2767,6 +2778,18 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
__io_complete_rw(req, ret, 0, issue_flags);
else
io_rw_done(kiocb, ret);
+
+ if (check_reissue && req->flags & REQ_F_REISSUE) {
+ req->flags &= ~REQ_F_REISSUE;
+ if (!io_rw_reissue(req)) {
+ int cflags = 0;
+
+ req_set_fail_links(req);
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ cflags = io_put_rw_kbuf(req);
+ __io_req_complete(req, issue_flags, ret, cflags);
+ }
+ }
}
static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
@@ -3283,11 +3306,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
ret = io_iter_do_read(req, iter);
- if (ret == -EIOCBQUEUED) {
- if (req->async_data)
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
- goto out_free;
- } else if (ret == -EAGAIN) {
+ if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
+ req->flags &= ~REQ_F_REISSUE;
/* IOPOLL retry should happen for io-wq threads */
if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
goto done;
@@ -3297,6 +3317,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
/* some cases will consume bytes even on error returns */
iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
+ } else if (ret == -EIOCBQUEUED) {
+ goto out_free;
} else if (ret <= 0 || ret == io_size || !force_nonblock ||
(req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
/* read all, failed, already did sync or don't want to retry */
@@ -3409,6 +3431,11 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
else
ret2 = -EINVAL;
+ if (req->flags & REQ_F_REISSUE) {
+ req->flags &= ~REQ_F_REISSUE;
+ ret2 = -EAGAIN;
+ }
+
/*
* Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
* retry them without IOCB_NOWAIT.
@@ -3418,8 +3445,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
/* no retry on NONBLOCK nor RWF_NOWAIT */
if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
goto done;
- if (ret2 == -EIOCBQUEUED && req->async_data)
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
if (!force_nonblock || ret2 != -EAGAIN) {
/* IOPOLL retry should happen for io-wq threads */
if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
@@ -6718,7 +6743,7 @@ static int io_sq_thread(void *data)
char buf[TASK_COMM_LEN];
DEFINE_WAIT(wait);
- sprintf(buf, "iou-sqp-%d", sqd->task_pid);
+ snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
set_task_comm(current, buf);
current->pf_io_worker = NULL;
@@ -6729,26 +6754,32 @@ static int io_sq_thread(void *data)
current->flags |= PF_NO_SETAFFINITY;
mutex_lock(&sqd->lock);
+ /* a user may had exited before the thread started */
+ io_run_task_work_head(&sqd->park_task_work);
+
while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) {
int ret;
bool cap_entries, sqt_spin, needs_sched;
- if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) {
+ if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
+ signal_pending(current)) {
+ bool did_sig = false;
+
mutex_unlock(&sqd->lock);
+ if (signal_pending(current)) {
+ struct ksignal ksig;
+
+ did_sig = get_signal(&ksig);
+ }
cond_resched();
mutex_lock(&sqd->lock);
io_run_task_work();
io_run_task_work_head(&sqd->park_task_work);
+ if (did_sig)
+ break;
timeout = jiffies + sqd->sq_thread_idle;
continue;
}
- if (signal_pending(current)) {
- struct ksignal ksig;
-
- if (!get_signal(&ksig))
- continue;
- break;
- }
sqt_spin = false;
cap_entries = !list_is_singular(&sqd->ctx_list);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
@@ -8603,9 +8634,9 @@ static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
canceled++;
}
}
- io_commit_cqring(ctx);
+ if (canceled != 0)
+ io_commit_cqring(ctx);
spin_unlock_irq(&ctx->completion_lock);
-
if (canceled != 0)
io_cqring_ev_posted(ctx);
return canceled != 0;
@@ -9002,6 +9033,8 @@ void __io_uring_task_cancel(void)
/* make sure overflow events are dropped */
atomic_inc(&tctx->in_idle);
+ __io_uring_files_cancel(NULL);
+
do {
/* read completions before cancelations */
inflight = tctx_inflight(tctx);