From 6ad7f2332e84c46f0c94e73e05b5b7c2bc1a6b74 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 8 Apr 2021 01:54:39 +0100
Subject: io_uring: clear F_REISSUE right after getting it

There are lots of ways r/w request may continue its path after getting
REQ_F_REISSUE, it's not necessarily io-wq and can be, e.g. apoll,
and submitted via  io_async_task_func() -> __io_req_task_submit()

Clear the flag right after getting it, so the next attempt is well
prepared regardless how the request will be executed.

Fixes: 230d50d448ac ("io_uring: move reissue into regular IO path")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/11dcead939343f4e27cab0074d34afcab771bfa4.1617842918.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 65a17d560a73..f1881ac0744b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3294,6 +3294,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	ret = io_iter_do_read(req, iter);
 
 	if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
+		req->flags &= ~REQ_F_REISSUE;
 		/* IOPOLL retry should happen for io-wq threads */
 		if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
 			goto done;
@@ -3417,8 +3418,10 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	else
 		ret2 = -EINVAL;
 
-	if (req->flags & REQ_F_REISSUE)
+	if (req->flags & REQ_F_REISSUE) {
+		req->flags &= ~REQ_F_REISSUE;
 		ret2 = -EAGAIN;
+	}
 
 	/*
 	 * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
@@ -6173,7 +6176,6 @@ static void io_wq_submit_work(struct io_wq_work *work)
 		ret = -ECANCELED;
 
 	if (!ret) {
-		req->flags &= ~REQ_F_REISSUE;
 		do {
 			ret = io_issue_sqe(req, 0);
 			/*
-- 
cgit v1.2.3


From 9728463737db027557e8ba315cbbca6b81122c04 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 8 Apr 2021 19:28:03 +0100
Subject: io_uring: fix rw req completion

WARNING: at fs/io_uring.c:8578 io_ring_exit_work.cold+0x0/0x18

As reissuing is now passed back by REQ_F_REISSUE and kiocb_done()
internally uses __io_complete_rw(), it may stop after setting the flag
so leaving a dangling request.

There are tricky edge cases, e.g. reading beyound file, boundary, so
the easiest way is to hand code reissue in kiocb_done() as
__io_complete_rw() was doing for us before.

Fixes: 230d50d448ac ("io_uring: move reissue into regular IO path")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f602250d292f8a84cca9a01d747744d1e797be26.1617842918.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f1881ac0744b..bd14327c8e7e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2762,6 +2762,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 	struct io_async_rw *io = req->async_data;
+	bool check_reissue = kiocb->ki_complete == io_complete_rw;
 
 	/* add previously done IO, if any */
 	if (io && io->bytes_done > 0) {
@@ -2777,6 +2778,18 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 		__io_complete_rw(req, ret, 0, issue_flags);
 	else
 		io_rw_done(kiocb, ret);
+
+	if (check_reissue && req->flags & REQ_F_REISSUE) {
+		req->flags &= ~REQ_F_REISSUE;
+		if (!io_rw_reissue(req)) {
+			int cflags = 0;
+
+			req_set_fail_links(req);
+			if (req->flags & REQ_F_BUFFER_SELECTED)
+				cflags = io_put_rw_kbuf(req);
+			__io_req_complete(req, issue_flags, ret, cflags);
+		}
+	}
 }
 
 static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
-- 
cgit v1.2.3


From c60eb049f4a19ddddcd3ee97a9c79ab8066a6a03 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 8 Apr 2021 01:54:42 +0100
Subject: io-wq: cancel unbounded works on io-wq destroy

WARNING: CPU: 5 PID: 227 at fs/io_uring.c:8578 io_ring_exit_work+0xe6/0x470
RIP: 0010:io_ring_exit_work+0xe6/0x470
Call Trace:
 process_one_work+0x206/0x400
 worker_thread+0x4a/0x3d0
 kthread+0x129/0x170
 ret_from_fork+0x22/0x30

INFO: task lfs-openat:2359 blocked for more than 245 seconds.
task:lfs-openat      state:D stack:    0 pid: 2359 ppid:     1 flags:0x00000004
Call Trace:
 ...
 wait_for_completion+0x8b/0xf0
 io_wq_destroy_manager+0x24/0x60
 io_wq_put_and_exit+0x18/0x30
 io_uring_clean_tctx+0x76/0xa0
 __io_uring_files_cancel+0x1b9/0x2e0
 do_exit+0xc0/0xb40
 ...

Even after io-wq destroy has been issued io-wq worker threads will
continue executing all left work items as usual, and may hang waiting
for I/O that won't ever complete (aka unbounded).

[<0>] pipe_read+0x306/0x450
[<0>] io_iter_do_read+0x1e/0x40
[<0>] io_read+0xd5/0x330
[<0>] io_issue_sqe+0xd21/0x18a0
[<0>] io_wq_submit_work+0x6c/0x140
[<0>] io_worker_handle_work+0x17d/0x400
[<0>] io_wqe_worker+0x2c0/0x330
[<0>] ret_from_fork+0x22/0x30

Cancel all unbounded I/O instead of executing them. This changes the
user visible behaviour, but that's inevitable as io-wq is not per task.

Suggested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/cd4b543154154cba055cf86f351441c2174d7f71.1617842918.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 433c4d3c3c1c..4eba531bea5a 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -415,6 +415,7 @@ static void io_worker_handle_work(struct io_worker *worker)
 {
 	struct io_wqe *wqe = worker->wqe;
 	struct io_wq *wq = wqe->wq;
+	bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
 
 	do {
 		struct io_wq_work *work;
@@ -444,6 +445,9 @@ get_next:
 			unsigned int hash = io_get_work_hash(work);
 
 			next_hashed = wq_next_work(work);
+
+			if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND))
+				work->flags |= IO_WQ_WORK_CANCEL;
 			wq->do_work(work);
 			io_assign_current_work(worker, NULL);
 
-- 
cgit v1.2.3