From 07fee7aba5472d0e65345146a68b4bd1a8b656c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:19:02 +0300
Subject: bcache: use bvec_kmap_local in bio_csum

Using local kmaps slightly reduces the chances to stray writes, and
the bvec interface cleans up the code a little bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220303111905.321089-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/request.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6869e010475a..fdd0194f84dd 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -44,10 +44,10 @@ static void bio_csum(struct bio *bio, struct bkey *k)
 	uint64_t csum = 0;
 
 	bio_for_each_segment(bv, bio, iter) {
-		void *d = kmap(bv.bv_page) + bv.bv_offset;
+		void *d = bvec_kmap_local(&bv);
 
 		csum = crc64_be(csum, d, bv.bv_len);
-		kunmap(bv.bv_page);
+		kunmap_local(d);
 	}
 
 	k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1);
-- 
cgit v1.2.3


From 7b1002f7cfe581930f63787a0b3de0144e61ed55 Mon Sep 17 00:00:00 2001
From: Mingzhe Zou <mingzhe.zou@easystack.cn>
Date: Fri, 7 Jan 2022 16:21:13 +0800
Subject: bcache: fixup bcache_dev_sectors_dirty_add() multithreaded CPU false
 sharing

When attaching a cached device (a.k.a backing device) to a cache
device, bch_sectors_dirty_init() is called to count dirty sectors
and stripes (see what bcache_dev_sectors_dirty_add() does) on the
cache device.

When bcache_dev_sectors_dirty_add() is called, set_bit(stripe,
d->full_dirty_stripes) or clear_bit(stripe, d->full_dirty_stripes)
operation will always be performed. In full_dirty_stripes, each 1bit
represents stripe_size (8192) sectors (512B), so 1bit=4MB (8192*512),
and each CPU cache line=64B=512bit=2048MB. When 20 threads process
a cached disk with 100G dirty data, a single thread processes about
23M at a time, and 20 threads total 460M. These full_dirty_stripes
bits corresponding to the 460M data is likely to fall in the same CPU
cache line. When one of these threads performs a set_bit or clear_bit
operation, the same CPU cache line of other threads will become invalid
and must read the full_dirty_stripes from the main memory again. Compared
with single thread, the time of a bcache_dev_sectors_dirty_add()
call is increased by about 50 times in our test (100G dirty data,
20 threads, bcache_dev_sectors_dirty_add() is called more than
20 million times).

This patch tries to test_bit before set_bit or clear_bit operation.
Therefore, a lot of force set and clear operations will be avoided,
and most of bcache_dev_sectors_dirty_add() calls will only read CPU
cache line.

Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
Signed-off-by: Coly Li <colyli@suse.de>
---
 drivers/md/bcache/writeback.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index d42301e6309d..176461f89f46 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -585,10 +585,13 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
 
 		sectors_dirty = atomic_add_return(s,
 					d->stripe_sectors_dirty + stripe);
-		if (sectors_dirty == d->stripe_size)
-			set_bit(stripe, d->full_dirty_stripes);
-		else
-			clear_bit(stripe, d->full_dirty_stripes);
+		if (sectors_dirty == d->stripe_size) {
+			if (!test_bit(stripe, d->full_dirty_stripes))
+				set_bit(stripe, d->full_dirty_stripes);
+		} else {
+			if (test_bit(stripe, d->full_dirty_stripes))
+				clear_bit(stripe, d->full_dirty_stripes);
+		}
 
 		nr_sectors -= s;
 		stripe_offset = 0;
-- 
cgit v1.2.3


From 887554ab96588de2917b6c8c73e552da082e5368 Mon Sep 17 00:00:00 2001
From: Mingzhe Zou <mingzhe.zou@easystack.cn>
Date: Fri, 11 Feb 2022 14:39:15 +0800
Subject: bcache: fixup multiple threads crash

When multiple threads to check btree nodes in parallel, the main
thread wait for all threads to stop or CACHE_SET_IO_DISABLE flag:

wait_event_interruptible(check_state->wait,
                         atomic_read(&check_state->started) == 0 ||
                         test_bit(CACHE_SET_IO_DISABLE, &c->flags));

However, the bch_btree_node_read and bch_btree_node_read_done
maybe call bch_cache_set_error, then the CACHE_SET_IO_DISABLE
will be set. If the flag already set, the main thread return
error. At the same time, maybe some threads still running and
read NULL pointer, the kernel will crash.

This patch change the event wait condition, the main thread must
wait for all threads to stop.

Fixes: 8e7102273f597 ("bcache: make bch_btree_check() to be multithreaded")
Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
Cc: stable@vger.kernel.org # v5.7+
Signed-off-by: Coly Li <colyli@suse.de>
---
 drivers/md/bcache/btree.c     | 6 ++++--
 drivers/md/bcache/writeback.c | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 88c573eeb598..ad9f16689419 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -2060,9 +2060,11 @@ int bch_btree_check(struct cache_set *c)
 		}
 	}
 
+	/*
+	 * Must wait for all threads to stop.
+	 */
 	wait_event_interruptible(check_state->wait,
-				 atomic_read(&check_state->started) == 0 ||
-				  test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+				 atomic_read(&check_state->started) == 0);
 
 	for (i = 0; i < check_state->total_threads; i++) {
 		if (check_state->infos[i].result) {
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 176461f89f46..9ee0005874cd 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -1001,9 +1001,11 @@ void bch_sectors_dirty_init(struct bcache_device *d)
 		}
 	}
 
+	/*
+	 * Must wait for all threads to stop.
+	 */
 	wait_event_interruptible(state->wait,
-		 atomic_read(&state->started) == 0 ||
-		 test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+		 atomic_read(&state->started) == 0);
 
 out:
 	kfree(state);
-- 
cgit v1.2.3


From daae161fd2e568b4f481b177b8be34374df98b68 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Mon, 17 Jan 2022 12:38:47 +0100
Subject: md: raid1/raid10: drop pending_cnt

Those counters are not necessary after commit 11bb45e8aaf6 ("md: drop queue
limitation for RAID1 and RAID10"). Remove them from all code (conf and
plug structs). raid1_plug_cb and raid10_plug_cb are identical, so move
definition of raid1_plug_cb to common raid1-10 definitions and use it for
RAID10 too.

Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid1-10.c |  5 +++++
 drivers/md/raid1.c    | 11 -----------
 drivers/md/raid1.h    |  1 -
 drivers/md/raid10.c   | 17 +++--------------
 drivers/md/raid10.h   |  1 -
 5 files changed, 8 insertions(+), 27 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 83f9a4f3d82e..e61f6cad4e08 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -28,6 +28,11 @@ struct resync_pages {
 	struct page	*pages[RESYNC_PAGES];
 };
 
+struct raid1_plug_cb {
+	struct blk_plug_cb	cb;
+	struct bio_list		pending;
+};
+
 static void rbio_pool_free(void *rbio, void *data)
 {
 	kfree(rbio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c3288d46948d..0fb4d252034b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -824,7 +824,6 @@ static void flush_pending_writes(struct r1conf *conf)
 		struct bio *bio;
 
 		bio = bio_list_get(&conf->pending_bio_list);
-		conf->pending_count = 0;
 		spin_unlock_irq(&conf->device_lock);
 
 		/*
@@ -1167,12 +1166,6 @@ free_pages:
 	bio_put(behind_bio);
 }
 
-struct raid1_plug_cb {
-	struct blk_plug_cb	cb;
-	struct bio_list		pending;
-	int			pending_cnt;
-};
-
 static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
 {
 	struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb,
@@ -1184,7 +1177,6 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	if (from_schedule || current->bio_list) {
 		spin_lock_irq(&conf->device_lock);
 		bio_list_merge(&conf->pending_bio_list, &plug->pending);
-		conf->pending_count += plug->pending_cnt;
 		spin_unlock_irq(&conf->device_lock);
 		wake_up(&conf->wait_barrier);
 		md_wakeup_thread(mddev->thread);
@@ -1588,11 +1580,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 			plug = NULL;
 		if (plug) {
 			bio_list_add(&plug->pending, mbio);
-			plug->pending_cnt++;
 		} else {
 			spin_lock_irqsave(&conf->device_lock, flags);
 			bio_list_add(&conf->pending_bio_list, mbio);
-			conf->pending_count++;
 			spin_unlock_irqrestore(&conf->device_lock, flags);
 			md_wakeup_thread(mddev->thread);
 		}
@@ -3058,7 +3048,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 	init_waitqueue_head(&conf->wait_barrier);
 
 	bio_list_init(&conf->pending_bio_list);
-	conf->pending_count = 0;
 	conf->recovery_disabled = mddev->recovery_disabled - 1;
 
 	err = -EIO;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index ccf10e59b116..ebb6788820e7 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -87,7 +87,6 @@ struct r1conf {
 
 	/* queue pending writes to be submitted on unplug */
 	struct bio_list		pending_bio_list;
-	int			pending_count;
 
 	/* for use when syncing mirrors:
 	 * We don't allow both normal IO and resync/recovery IO at
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5dd2e17e1d0e..b369ebb965a9 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -861,7 +861,6 @@ static void flush_pending_writes(struct r10conf *conf)
 		struct bio *bio;
 
 		bio = bio_list_get(&conf->pending_bio_list);
-		conf->pending_count = 0;
 		spin_unlock_irq(&conf->device_lock);
 
 		/*
@@ -1054,16 +1053,9 @@ static sector_t choose_data_offset(struct r10bio *r10_bio,
 		return rdev->new_data_offset;
 }
 
-struct raid10_plug_cb {
-	struct blk_plug_cb	cb;
-	struct bio_list		pending;
-	int			pending_cnt;
-};
-
 static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 {
-	struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
-						   cb);
+	struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, cb);
 	struct mddev *mddev = plug->cb.data;
 	struct r10conf *conf = mddev->private;
 	struct bio *bio;
@@ -1071,7 +1063,6 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	if (from_schedule || current->bio_list) {
 		spin_lock_irq(&conf->device_lock);
 		bio_list_merge(&conf->pending_bio_list, &plug->pending);
-		conf->pending_count += plug->pending_cnt;
 		spin_unlock_irq(&conf->device_lock);
 		wake_up(&conf->wait_barrier);
 		md_wakeup_thread(mddev->thread);
@@ -1238,7 +1229,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
 	unsigned long flags;
 	struct blk_plug_cb *cb;
-	struct raid10_plug_cb *plug = NULL;
+	struct raid1_plug_cb *plug = NULL;
 	struct r10conf *conf = mddev->private;
 	struct md_rdev *rdev;
 	int devnum = r10_bio->devs[n_copy].devnum;
@@ -1280,16 +1271,14 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 
 	cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
 	if (cb)
-		plug = container_of(cb, struct raid10_plug_cb, cb);
+		plug = container_of(cb, struct raid1_plug_cb, cb);
 	else
 		plug = NULL;
 	if (plug) {
 		bio_list_add(&plug->pending, mbio);
-		plug->pending_cnt++;
 	} else {
 		spin_lock_irqsave(&conf->device_lock, flags);
 		bio_list_add(&conf->pending_bio_list, mbio);
-		conf->pending_count++;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 		md_wakeup_thread(mddev->thread);
 	}
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index c34bb196790e..5c0804d8bb1f 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -75,7 +75,6 @@ struct r10conf {
 
 	/* queue pending writes and submit them on unplug */
 	struct bio_list		pending_bio_list;
-	int			pending_count;
 
 	spinlock_t		resync_lock;
 	atomic_t		nr_pending;
-- 
cgit v1.2.3


From 7d959f6e978cbbca90e26a192cc39480e977182f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 3 Mar 2022 15:19:33 -0800
Subject: md: use msleep() in md_notify_reboot()

Calling mdelay(1000) from process context, even while a reboot
is in progress, does not make sense.

Using msleep() allows other threads to make progress.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: linux-raid@vger.kernel.org
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/md')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index f88a9e948f3e..d059e21f3c35 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9582,7 +9582,7 @@ static int md_notify_reboot(struct notifier_block *this,
 	 * driver, we do want to have a safe RAID driver ...
 	 */
 	if (need_delay)
-		mdelay(1000*1);
+		msleep(1000);
 
 	return NOTIFY_DONE;
 }
-- 
cgit v1.2.3


From 9f7c3f837a3768ce8d5a5c34070a47608a4a0209 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 13:25:00 +0200
Subject: raid5-ppl: fully initialize the bio in ppl_new_iounit

We have all the information to pass the bdev and op directly to bio_init,
so do that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5-ppl.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 3446797fa0ac..f7fdd8292485 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -250,7 +250,8 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
 	INIT_LIST_HEAD(&io->stripe_list);
 	atomic_set(&io->pending_stripes, 0);
 	atomic_set(&io->pending_flushes, 0);
-	bio_init(&io->bio, NULL, io->biovec, PPL_IO_INLINE_BVECS, 0);
+	bio_init(&io->bio, log->rdev->bdev, io->biovec, PPL_IO_INLINE_BVECS,
+		 REQ_OP_WRITE | REQ_FUA);
 
 	pplhdr = page_address(io->header_page);
 	clear_page(pplhdr);
@@ -465,8 +466,6 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
 
 
 	bio->bi_end_io = ppl_log_endio;
-	bio->bi_opf = REQ_OP_WRITE | REQ_FUA;
-	bio_set_dev(bio, log->rdev->bdev);
 	bio->bi_iter.bi_sector = log->next_io_sector;
 	bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
 	bio->bi_write_hint = ppl_conf->write_hint;
-- 
cgit v1.2.3


From 0dd00cba99c352dc9afd62979f350d808c215cb9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 13:25:01 +0200
Subject: raid5-cache: fully initialize flush_bio when needed

Stop using bio_reset and just initialize the bio fully when needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5-cache.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 86e2bb89d9c7..8907d18c225a 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1266,6 +1266,8 @@ static void r5l_log_flush_endio(struct bio *bio)
 		r5l_io_run_stripes(io);
 	list_splice_tail_init(&log->flushing_ios, &log->finished_ios);
 	spin_unlock_irqrestore(&log->io_list_lock, flags);
+
+	bio_uninit(bio);
 }
 
 /*
@@ -1301,7 +1303,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
 
 	if (!do_flush)
 		return;
-	bio_reset(&log->flush_bio, log->rdev->bdev,
+	bio_init(&log->flush_bio, log->rdev->bdev, NULL, 0,
 		  REQ_OP_WRITE | REQ_PREFLUSH);
 	log->flush_bio.bi_end_io = r5l_log_flush_endio;
 	submit_bio(&log->flush_bio);
@@ -3105,7 +3107,6 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	INIT_LIST_HEAD(&log->io_end_ios);
 	INIT_LIST_HEAD(&log->flushing_ios);
 	INIT_LIST_HEAD(&log->finished_ios);
-	bio_init(&log->flush_bio, NULL, NULL, 0, 0);
 
 	log->io_kc = KMEM_CACHE(r5l_io_unit, 0);
 	if (!log->io_kc)
-- 
cgit v1.2.3


From 89f94b64408b576c88e407d87061bb7b9bc2111d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 13:25:02 +0200
Subject: raid5-cache: statically allocate the recovery ra bio

There is no need to preallocate the bio and reset it when use.  Just
allocate it on-stack and use a bvec places next to the pages used for
it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5-cache.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 8907d18c225a..a7d50ff9020a 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1623,10 +1623,10 @@ struct r5l_recovery_ctx {
 	 * just copy data from the pool.
 	 */
 	struct page *ra_pool[R5L_RECOVERY_PAGE_POOL_SIZE];
+	struct bio_vec ra_bvec[R5L_RECOVERY_PAGE_POOL_SIZE];
 	sector_t pool_offset;	/* offset of first page in the pool */
 	int total_pages;	/* total allocated pages */
 	int valid_pages;	/* pages with valid data */
-	struct bio *ra_bio;	/* bio to do the read ahead */
 };
 
 static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
@@ -1634,11 +1634,6 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
 {
 	struct page *page;
 
-	ctx->ra_bio = bio_alloc_bioset(NULL, BIO_MAX_VECS, 0, GFP_KERNEL,
-				       &log->bs);
-	if (!ctx->ra_bio)
-		return -ENOMEM;
-
 	ctx->valid_pages = 0;
 	ctx->total_pages = 0;
 	while (ctx->total_pages < R5L_RECOVERY_PAGE_POOL_SIZE) {
@@ -1650,10 +1645,8 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
 		ctx->total_pages += 1;
 	}
 
-	if (ctx->total_pages == 0) {
-		bio_put(ctx->ra_bio);
+	if (ctx->total_pages == 0)
 		return -ENOMEM;
-	}
 
 	ctx->pool_offset = 0;
 	return 0;
@@ -1666,7 +1659,6 @@ static void r5l_recovery_free_ra_pool(struct r5l_log *log,
 
 	for (i = 0; i < ctx->total_pages; ++i)
 		put_page(ctx->ra_pool[i]);
-	bio_put(ctx->ra_bio);
 }
 
 /*
@@ -1679,15 +1671,19 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
 				      struct r5l_recovery_ctx *ctx,
 				      sector_t offset)
 {
-	bio_reset(ctx->ra_bio, log->rdev->bdev, REQ_OP_READ);
-	ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset;
+	struct bio bio;
+	int ret;
+
+	bio_init(&bio, log->rdev->bdev, ctx->ra_bvec,
+		 R5L_RECOVERY_PAGE_POOL_SIZE, REQ_OP_READ);
+	bio.bi_iter.bi_sector = log->rdev->data_offset + offset;
 
 	ctx->valid_pages = 0;
 	ctx->pool_offset = offset;
 
 	while (ctx->valid_pages < ctx->total_pages) {
-		bio_add_page(ctx->ra_bio,
-			     ctx->ra_pool[ctx->valid_pages], PAGE_SIZE, 0);
+		__bio_add_page(&bio, ctx->ra_pool[ctx->valid_pages], PAGE_SIZE,
+			       0);
 		ctx->valid_pages += 1;
 
 		offset = r5l_ring_add(log, offset, BLOCK_SECTORS);
@@ -1696,7 +1692,9 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
 			break;
 	}
 
-	return submit_bio_wait(ctx->ra_bio);
+	ret = submit_bio_wait(&bio);
+	bio_uninit(&bio);
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3


From 03a6b195e8e846f7373bcbeb3ea2a756dfb9ae61 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 13:25:03 +0200
Subject: raid5: initialize the stripe_head embeeded bios as needed

Use bio_init to initialize the bios when needed to the full state
instead of a partial initialization plus later setting of dev and op
and bio_reset.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8891aaba6596..8bd5f06390ea 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1060,6 +1060,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 	int i, disks = sh->disks;
 	struct stripe_head *head_sh = sh;
 	struct bio_list pending_bios = BIO_EMPTY_LIST;
+	struct r5dev *dev;
 	bool should_defer;
 
 	might_sleep();
@@ -1094,8 +1095,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 			op_flags |= REQ_SYNC;
 
 again:
-		bi = &sh->dev[i].req;
-		rbi = &sh->dev[i].rreq; /* For writing to replacement */
+		dev = &sh->dev[i];
+		bi = &dev->req;
+		rbi = &dev->rreq; /* For writing to replacement */
 
 		rcu_read_lock();
 		rrdev = rcu_dereference(conf->disks[i].replacement);
@@ -1171,8 +1173,7 @@ again:
 
 			set_bit(STRIPE_IO_STARTED, &sh->state);
 
-			bio_set_dev(bi, rdev->bdev);
-			bio_set_op_attrs(bi, op, op_flags);
+			bio_init(bi, rdev->bdev, &dev->vec, 1, op | op_flags);
 			bi->bi_end_io = op_is_write(op)
 				? raid5_end_write_request
 				: raid5_end_read_request;
@@ -1238,8 +1239,7 @@ again:
 
 			set_bit(STRIPE_IO_STARTED, &sh->state);
 
-			bio_set_dev(rbi, rrdev->bdev);
-			bio_set_op_attrs(rbi, op, op_flags);
+			bio_init(rbi, rrdev->bdev, &dev->rvec, 1, op | op_flags);
 			BUG_ON(!op_is_write(op));
 			rbi->bi_end_io = raid5_end_write_request;
 			rbi->bi_private = sh;
@@ -2294,7 +2294,6 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 	int disks, struct r5conf *conf)
 {
 	struct stripe_head *sh;
-	int i;
 
 	sh = kmem_cache_zalloc(sc, gfp);
 	if (sh) {
@@ -2307,12 +2306,6 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 		atomic_set(&sh->count, 1);
 		sh->raid_conf = conf;
 		sh->log_start = MaxSector;
-		for (i = 0; i < disks; i++) {
-			struct r5dev *dev = &sh->dev[i];
-
-			bio_init(&dev->req, NULL, &dev->vec, 1, 0);
-			bio_init(&dev->rreq, NULL, &dev->rvec, 1, 0);
-		}
 
 		if (raid5_has_ppl(conf)) {
 			sh->ppl_page = alloc_page(gfp);
@@ -2677,7 +2670,6 @@ static void raid5_end_read_request(struct bio * bi)
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
 		bi->bi_status);
 	if (i == disks) {
-		bio_reset(bi, NULL, 0);
 		BUG();
 		return;
 	}
@@ -2785,7 +2777,7 @@ static void raid5_end_read_request(struct bio * bi)
 		}
 	}
 	rdev_dec_pending(rdev, conf->mddev);
-	bio_reset(bi, NULL, 0);
+	bio_uninit(bi);
 	clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	set_bit(STRIPE_HANDLE, &sh->state);
 	raid5_release_stripe(sh);
@@ -2823,7 +2815,6 @@ static void raid5_end_write_request(struct bio *bi)
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
 		bi->bi_status);
 	if (i == disks) {
-		bio_reset(bi, NULL, 0);
 		BUG();
 		return;
 	}
@@ -2860,7 +2851,7 @@ static void raid5_end_write_request(struct bio *bi)
 	if (sh->batch_head && bi->bi_status && !replacement)
 		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
-	bio_reset(bi, NULL, 0);
+	bio_uninit(bi);
 	if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
 		clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	set_bit(STRIPE_HANDLE, &sh->state);
-- 
cgit v1.2.3