diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-21 16:17:03 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-21 16:17:03 +0200 |
commit | 3c7c25038b6c7d66a6816028219914379be6a5cc (patch) | |
tree | d21fb0d5e70dc309b9cdff78985d08c081a7135a | |
parent | f3a78227eef20c0ba13bbf9401f0a340bca3ad16 (diff) | |
parent | 46cdc45acb089c811d9a54fd50af33b96e5fae9d (diff) |
Merge tag 'block-5.17-2022-01-21' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"Various little minor fixes that should go into this release:
- Fix issue with cloned bios and IO accounting (Christoph)
- Remove redundant assignments (Colin, GuoYong)
- Fix an issue with the mq-deadline async_depth sysfs interface (me)
- Fix brd module loading race (Tetsuo)
- Shared tag map wakeup fix (Laibin)
- End of bdev read fix (OGAWA)
- srcu leak fix (Ming)"
* tag 'block-5.17-2022-01-21' of git://git.kernel.dk/linux-block:
block: fix async_depth sysfs interface for mq-deadline
block: Fix wrong offset in bio_truncate()
block: assign bi_bdev for cloned bios in blk_rq_prep_clone
block: cleanup q->srcu
block: Remove unnecessary variable assignment
brd: remove brd_devices_mutex mutex
aoe: remove redundant assignment on variable n
loop: remove redundant initialization of pointer node
blk-mq: fix tag_get wait task can't be awakened
-rw-r--r-- | block/bio.c | 3 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 40 | ||||
-rw-r--r-- | block/blk-mq.c | 1 | ||||
-rw-r--r-- | block/blk-sysfs.c | 4 | ||||
-rw-r--r-- | block/mq-deadline.c | 4 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 2 | ||||
-rw-r--r-- | drivers/block/brd.c | 73 | ||||
-rw-r--r-- | drivers/block/loop.c | 2 | ||||
-rw-r--r-- | include/linux/sbitmap.h | 11 | ||||
-rw-r--r-- | lib/sbitmap.c | 25 |
10 files changed, 106 insertions, 59 deletions
diff --git a/block/bio.c b/block/bio.c index 0d400ba2dbd1..4312a8085396 100644 --- a/block/bio.c +++ b/block/bio.c @@ -569,7 +569,8 @@ static void bio_truncate(struct bio *bio, unsigned new_size) offset = new_size - done; else offset = 0; - zero_user(bv.bv_page, offset, bv.bv_len - offset); + zero_user(bv.bv_page, bv.bv_offset + offset, + bv.bv_len - offset); truncated = true; } done += bv.bv_len; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index e55a6834c9a6..845f74e8dd7b 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -17,6 +17,21 @@ #include "blk-mq-tag.h" /* + * Recalculate wakeup batch when tag is shared by hctx. + */ +static void blk_mq_update_wake_batch(struct blk_mq_tags *tags, + unsigned int users) +{ + if (!users) + return; + + sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags, + users); + sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags, + users); +} + +/* * If a previously inactive queue goes active, bump the active user count. * We need to do this before try to allocate driver tag, then even if fail * to get tag when first time, the other shared-tag users could reserve @@ -24,18 +39,26 @@ */ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { + unsigned int users; + if (blk_mq_is_shared_tags(hctx->flags)) { struct request_queue *q = hctx->queue; - if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) && - !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) - atomic_inc(&hctx->tags->active_queues); + if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) || + test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) { + return true; + } } else { - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && - !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - atomic_inc(&hctx->tags->active_queues); + if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) || + test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) { + return true; + } } + users = atomic_inc_return(&hctx->tags->active_queues); + + blk_mq_update_wake_batch(hctx->tags, users); + return true; } @@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { struct blk_mq_tags *tags = hctx->tags; + unsigned int users; if (blk_mq_is_shared_tags(hctx->flags)) { struct request_queue *q = hctx->queue; @@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) return; } - atomic_dec(&tags->active_queues); + users = atomic_dec_return(&tags->active_queues); + + blk_mq_update_wake_batch(tags, users); blk_mq_tag_wakeup_all(tags, false); } diff --git a/block/blk-mq.c b/block/blk-mq.c index a6d4780580fc..b5e35e63adad 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2976,6 +2976,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, bio = bio_clone_fast(bio_src, gfp_mask, bs); if (!bio) goto free_and_out; + bio->bi_bdev = rq->q->disk->part0; if (bio_ctr && bio_ctr(bio, bio_src, data)) goto free_and_out; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e20eadfcf5c8..9f32882ceb2f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -811,6 +811,9 @@ static void blk_release_queue(struct kobject *kobj) bioset_exit(&q->bio_split); + if (blk_queue_has_srcu(q)) + cleanup_srcu_struct(q->srcu); + ida_simple_remove(&blk_queue_ida, q->id); call_rcu(&q->rcu_head, blk_free_queue_rcu); } @@ -887,7 +890,6 @@ int blk_register_queue(struct gendisk *disk) kobject_uevent(&q->elevator->kobj, KOBJ_ADD); mutex_unlock(&q->sysfs_lock); - ret = 0; unlock: mutex_unlock(&q->sysfs_dir_lock); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 85d919bf60c7..3ed5eaf3446a 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -865,7 +865,7 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]); SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire); SHOW_INT(deadline_writes_starved_show, dd->writes_starved); SHOW_INT(deadline_front_merges_show, dd->front_merges); -SHOW_INT(deadline_async_depth_show, dd->front_merges); +SHOW_INT(deadline_async_depth_show, dd->async_depth); SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch); #undef SHOW_INT #undef SHOW_JIFFIES @@ -895,7 +895,7 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX); STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX); STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1); -STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX); +STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX); STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX); #undef STORE_FUNCTION #undef STORE_INT diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 588889bea7c3..6af111f568e4 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -122,7 +122,7 @@ newtag(struct aoedev *d) register ulong n; n = jiffies & 0xffff; - return n |= (++d->lasttag & 0x7fff) << 16; + return n | (++d->lasttag & 0x7fff) << 16; } static u32 diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 8fe2e4289dae..6e3f2f0d2352 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -362,7 +362,6 @@ __setup("ramdisk_size=", ramdisk_size); * (should share code eventually). */ static LIST_HEAD(brd_devices); -static DEFINE_MUTEX(brd_devices_mutex); static struct dentry *brd_debugfs_dir; static int brd_alloc(int i) @@ -372,21 +371,14 @@ static int brd_alloc(int i) char buf[DISK_NAME_LEN]; int err = -ENOMEM; - mutex_lock(&brd_devices_mutex); - list_for_each_entry(brd, &brd_devices, brd_list) { - if (brd->brd_number == i) { - mutex_unlock(&brd_devices_mutex); + list_for_each_entry(brd, &brd_devices, brd_list) + if (brd->brd_number == i) return -EEXIST; - } - } brd = kzalloc(sizeof(*brd), GFP_KERNEL); - if (!brd) { - mutex_unlock(&brd_devices_mutex); + if (!brd) return -ENOMEM; - } brd->brd_number = i; list_add_tail(&brd->brd_list, &brd_devices); - mutex_unlock(&brd_devices_mutex); spin_lock_init(&brd->brd_lock); INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC); @@ -429,9 +421,7 @@ static int brd_alloc(int i) out_cleanup_disk: blk_cleanup_disk(disk); out_free_dev: - mutex_lock(&brd_devices_mutex); list_del(&brd->brd_list); - mutex_unlock(&brd_devices_mutex); kfree(brd); return err; } @@ -441,15 +431,19 @@ static void brd_probe(dev_t dev) brd_alloc(MINOR(dev) / max_part); } -static void brd_del_one(struct brd_device *brd) +static void brd_cleanup(void) { - del_gendisk(brd->brd_disk); - blk_cleanup_disk(brd->brd_disk); - brd_free_pages(brd); - mutex_lock(&brd_devices_mutex); - list_del(&brd->brd_list); - mutex_unlock(&brd_devices_mutex); - kfree(brd); + struct brd_device *brd, *next; + + debugfs_remove_recursive(brd_debugfs_dir); + + list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { + del_gendisk(brd->brd_disk); + blk_cleanup_disk(brd->brd_disk); + brd_free_pages(brd); + list_del(&brd->brd_list); + kfree(brd); + } } static inline void brd_check_and_reset_par(void) @@ -473,9 +467,18 @@ static inline void brd_check_and_reset_par(void) static int __init brd_init(void) { - struct brd_device *brd, *next; int err, i; + brd_check_and_reset_par(); + + brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL); + + for (i = 0; i < rd_nr; i++) { + err = brd_alloc(i); + if (err) + goto out_free; + } + /* * brd module now has a feature to instantiate underlying device * structure on-demand, provided that there is an access dev node. @@ -491,28 +494,16 @@ static int __init brd_init(void) * dynamically. */ - if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) - return -EIO; - - brd_check_and_reset_par(); - - brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL); - - for (i = 0; i < rd_nr; i++) { - err = brd_alloc(i); - if (err) - goto out_free; + if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) { + err = -EIO; + goto out_free; } pr_info("brd: module loaded\n"); return 0; out_free: - unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); - debugfs_remove_recursive(brd_debugfs_dir); - - list_for_each_entry_safe(brd, next, &brd_devices, brd_list) - brd_del_one(brd); + brd_cleanup(); pr_info("brd: module NOT loaded !!!\n"); return err; @@ -520,13 +511,9 @@ out_free: static void __exit brd_exit(void) { - struct brd_device *brd, *next; unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); - debugfs_remove_recursive(brd_debugfs_dir); - - list_for_each_entry_safe(brd, next, &brd_devices, brd_list) - brd_del_one(brd); + brd_cleanup(); pr_info("brd: module unloaded\n"); } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b1b05c45c07c..01cbbfc4e9e2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -820,7 +820,7 @@ static inline int queue_on_root_worker(struct cgroup_subsys_state *css) static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd) { - struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL; + struct rb_node **node, *parent = NULL; struct loop_worker *cur_worker, *worker = NULL; struct work_struct *work; struct list_head *cmd_list; diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index fc0357a6e19b..95df357ec009 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -416,6 +416,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) } /** + * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch + * @sbq: Bitmap queue to recalculate wake batch. + * @users: Number of shares. + * + * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch + * by depth. This interface is for HCTX shared tags or queue shared tags. + */ +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, + unsigned int users); + +/** * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. * @sbq: Bitmap queue to resize. * @depth: New number of bits to resize to. diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 2709ab825499..6220fa67fb7e 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, - unsigned int depth) +static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, + unsigned int wake_batch) { - unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); int i; if (sbq->wake_batch != wake_batch) { @@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, } } +static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth) +{ + unsigned int wake_batch; + + wake_batch = sbq_calc_wake_batch(sbq, depth); + __sbitmap_queue_update_wake_batch(sbq, wake_batch); +} + +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, + unsigned int users) +{ + unsigned int wake_batch; + + wake_batch = clamp_val((sbq->sb.depth + users - 1) / + users, 4, SBQ_WAKE_BATCH); + __sbitmap_queue_update_wake_batch(sbq, wake_batch); +} +EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); + void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { sbitmap_queue_update_wake_batch(sbq, depth); |