From 611bee526b4a89d49f1b9914a770bfdc101d5fb5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:41 +0200 Subject: block: replace bd_set_size with bd_set_nr_sectors Replace bd_set_size with a version that takes the number of sectors instead, as that fits most of the current and future callers much better. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2910f6caab7d..aab130f31e25 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -688,7 +688,7 @@ static inline void nvme_mpath_update_disk_size(struct gendisk *disk) struct block_device *bdev = bdget_disk(disk, 0); if (bdev) { - bd_set_size(bdev, get_capacity(disk) << SECTOR_SHIFT); + bd_set_nr_sectors(bdev, get_capacity(disk)); bdput(bdev); } } -- cgit v1.2.3 From c13f0fbc4c191aab5e95b01589ff5bbc6556e4f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:43 +0200 Subject: nvme: don't call revalidate_disk from nvme_set_queue_dying In nvme_set_queue_dying we really just want to ensure the disk and bdev sizes are set to zero. Going through revalidate_disk leads to a somewhat arcance and complex callchain relying on special behavior in a few places. Instead just lift the set_capacity directly to nvme_set_queue_dying, and rename and move the nvme_mpath_update_disk_size helper so that we can use it in nvme_set_queue_dying to propagate the size to the bdev without detours. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 33 +++++++++++++++++++++++---------- drivers/nvme/host/nvme.h | 13 ------------- 2 files changed, 23 insertions(+), 23 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d543bc1747fd..c7e01d9667ad 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -94,21 +94,34 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); +static void nvme_update_bdev_size(struct gendisk *disk) +{ + struct block_device *bdev = bdget_disk(disk, 0); + + if (bdev) { + bd_set_nr_sectors(bdev, get_capacity(disk)); + bdput(bdev); + } +} + +/* + * Prepare a queue for teardown. + * + * This must forcibly unquiesce queues to avoid blocking dispatch, and only set + * the capacity to 0 after that to avoid blocking dispatchers that may be + * holding bd_butex. This will end buffered writers dirtying pages that can't + * be synced. + */ static void nvme_set_queue_dying(struct nvme_ns *ns) { - /* - * Revalidating a dead namespace sets capacity to 0. This will end - * buffered writers dirtying pages that can't be synced. - */ if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) return; + blk_set_queue_dying(ns->queue); - /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); - /* - * Revalidate after unblocking dispatchers that may be holding bd_butex - */ - revalidate_disk(ns->disk); + + set_capacity(ns->disk, 0); + nvme_update_bdev_size(ns->disk); } static void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -2134,7 +2147,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) nvme_update_disk_info(ns->head->disk, ns, id); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); - nvme_mpath_update_disk_size(ns->head->disk); + nvme_update_bdev_size(ns->head->disk); } #endif return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index aab130f31e25..87737fa32360 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -683,16 +683,6 @@ static inline void nvme_trace_bio_complete(struct request *req, trace_block_bio_complete(ns->head->disk->queue, req->bio); } -static inline void nvme_mpath_update_disk_size(struct gendisk *disk) -{ - struct block_device *bdev = bdget_disk(disk, 0); - - if (bdev) { - bd_set_nr_sectors(bdev, get_capacity(disk)); - bdput(bdev); - } -} - extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute subsys_attr_iopolicy; @@ -767,9 +757,6 @@ static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) { } -static inline void nvme_mpath_update_disk_size(struct gendisk *disk) -{ -} #endif /* CONFIG_NVME_MULTIPATH */ #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From b55d3d21a05d5558440515c1a73d87bc4b1fa17c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:45 +0200 Subject: nvme: opencode revalidate_disk in nvme_validate_ns Keep control in the NVMe driver instead of going through an indirect call back into ->revalidate_disk. Also reorder the function a bit to be easier to follow with the additional code. And now that we have removed all callers of revalidate_disk() in the nvme code, ->revalidate_disk is only called from the open code when first opening the device. Which is of course totally pointless as we have a valid size since the initial scan, and will get an updated view through the asynchronous notifiation everytime the size changes. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c7e01d9667ad..ea1fa41fbba8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2352,7 +2352,6 @@ static const struct block_device_operations nvme_fops = { .open = nvme_open, .release = nvme_release, .getgeo = nvme_getgeo, - .revalidate_disk= nvme_revalidate_disk, .report_zones = nvme_report_zones, .pr_ops = &nvme_pr_ops, }; @@ -4053,14 +4052,19 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; + int ret; ns = nvme_find_get_ns(ctrl, nsid); - if (ns) { - if (revalidate_disk(ns->disk)) - nvme_ns_remove(ns); - nvme_put_ns(ns); - } else + if (!ns) { nvme_alloc_ns(ctrl, nsid); + return; + } + + ret = nvme_revalidate_disk(ns->disk); + revalidate_disk_size(ns->disk, ret == 0); + if (ret) + nvme_ns_remove(ns); + nvme_put_ns(ns); } static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, -- cgit v1.2.3 From c2e4cd57cfa1f627b786c764d185fff85fd12be9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:34 +0200 Subject: block: lift setting the readahead size into the block layer Drivers shouldn't really mess with the readahead size, as that is a VM concept. Instead set it based on the optimal I/O size by lifting the algorithm from the md driver when registering the disk. Also set bdi->io_pages there as well by applying the same scheme based on max_sectors. To ensure the limits work well for stacking drivers a new helper is added to update the readahead limits from the block limits, which is also called from disk_stack_limits. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Jan Kara Reviewed-by: Mike Snitzer Reviewed-by: Martin K. Petersen Acked-by: Coly Li Signed-off-by: Jens Axboe --- block/blk-settings.c | 18 ++++++++++++++++-- block/blk-sysfs.c | 2 ++ drivers/block/aoe/aoeblk.c | 1 - drivers/block/drbd/drbd_nl.c | 10 +--------- drivers/md/bcache/super.c | 3 --- drivers/md/dm-table.c | 3 +-- drivers/md/raid0.c | 16 ---------------- drivers/md/raid10.c | 24 +----------------------- drivers/md/raid5.c | 13 +------------ drivers/nvme/host/core.c | 1 + include/linux/blkdev.h | 1 + 11 files changed, 24 insertions(+), 68 deletions(-) (limited to 'drivers/nvme') diff --git a/block/blk-settings.c b/block/blk-settings.c index 5ea3de48afba..4f6eb4bb1723 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -372,6 +372,19 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) } EXPORT_SYMBOL(blk_queue_alignment_offset); +void blk_queue_update_readahead(struct request_queue *q) +{ + /* + * For read-ahead of large files to be effective, we need to read ahead + * at least twice the optimal I/O size. + */ + q->backing_dev_info->ra_pages = + max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); + q->backing_dev_info->io_pages = + queue_max_sectors(q) >> (PAGE_SHIFT - 9); +} +EXPORT_SYMBOL_GPL(blk_queue_update_readahead); + /** * blk_limits_io_min - set minimum request size for a device * @limits: the queue limits @@ -450,6 +463,8 @@ EXPORT_SYMBOL(blk_limits_io_opt); void blk_queue_io_opt(struct request_queue *q, unsigned int opt) { blk_limits_io_opt(&q->limits, opt); + q->backing_dev_info->ra_pages = + max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); } EXPORT_SYMBOL(blk_queue_io_opt); @@ -631,8 +646,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, top, bottom); } - t->backing_dev_info->io_pages = - t->limits.max_sectors >> (PAGE_SHIFT - 9); + blk_queue_update_readahead(disk->queue); } EXPORT_SYMBOL(disk_stack_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 81722cdcf0cb..869ed21a9edc 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -854,6 +854,8 @@ int blk_register_queue(struct gendisk *disk) percpu_ref_switch_to_percpu(&q->q_usage_counter); } + blk_queue_update_readahead(q); + ret = blk_trace_init_sysfs(dev); if (ret) return ret; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index d8cfc233e64b..c34e71b0c4a9 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -406,7 +406,6 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); - q->backing_dev_info->ra_pages = SZ_2M / PAGE_SIZE; blk_queue_io_opt(q, SZ_2M); d->bufpool = mp; d->blkq = gd->queue = q; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index aaff5bde3915..54a4930c04fe 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1362,15 +1362,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi if (b) { blk_stack_limits(&q->limits, &b->limits, 0); - - if (q->backing_dev_info->ra_pages != - b->backing_dev_info->ra_pages) { - drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", - q->backing_dev_info->ra_pages, - b->backing_dev_info->ra_pages); - q->backing_dev_info->ra_pages = - b->backing_dev_info->ra_pages; - } + blk_queue_update_readahead(q); } fixup_discard_if_not_supported(q); fixup_write_zeroes(device, q); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 48113005ed86..6bfa77167362 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1427,9 +1427,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) if (ret) return ret; - dc->disk.disk->queue->backing_dev_info->ra_pages = - max(dc->disk.disk->queue->backing_dev_info->ra_pages, - q->backing_dev_info->ra_pages); blk_queue_io_opt(dc->disk.disk->queue, max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q))); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5edc3079e7c1..ef2757012f59 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1925,8 +1925,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } #endif - /* Allow reads to exceed readahead limits */ - q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9); + blk_queue_update_readahead(q); } unsigned int dm_table_get_num_targets(struct dm_table *t) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f54a449f97aa..aa2d72791768 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -410,22 +410,6 @@ static int raid0_run(struct mddev *mddev) mdname(mddev), (unsigned long long)mddev->array_sectors); - if (mddev->queue) { - /* calculate the max read-ahead size. - * For read-ahead of large files to be effective, we need to - * readahead at least twice a whole stripe. i.e. number of devices - * multiplied by chunk size times 2. - * If an individual device has an ra_pages greater than the - * chunk size, then we will not drive that device as hard as it - * wants. We consider this a configuration error: a larger - * chunksize should be used in that case. - */ - int stripe = mddev->raid_disks * - (mddev->chunk_sectors << 9) / PAGE_SIZE; - if (mddev->queue->backing_dev_info->ra_pages < 2* stripe) - mddev->queue->backing_dev_info->ra_pages = 2* stripe; - } - dump_zones(mddev); ret = md_integrity_register(mddev); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9956a04ac13b..5d1bdee313ec 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3873,19 +3873,6 @@ static int raid10_run(struct mddev *mddev) mddev->resync_max_sectors = size; set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); - if (mddev->queue) { - int stripe = conf->geo.raid_disks * - ((mddev->chunk_sectors << 9) / PAGE_SIZE); - - /* Calculate max read-ahead size. - * We need to readahead at least twice a whole stripe.... - * maybe... - */ - stripe /= conf->geo.near_copies; - if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - mddev->queue->backing_dev_info->ra_pages = 2 * stripe; - } - if (md_integrity_register(mddev)) goto out_free_conf; @@ -4723,17 +4710,8 @@ static void end_reshape(struct r10conf *conf) conf->reshape_safe = MaxSector; spin_unlock_irq(&conf->device_lock); - /* read-ahead size must cover two whole stripes, which is - * 2 * (datadisks) * chunksize where 'n' is the number of raid devices - */ - if (conf->mddev->queue) { - int stripe = conf->geo.raid_disks * - ((conf->mddev->chunk_sectors << 9) / PAGE_SIZE); - stripe /= conf->geo.near_copies; - if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; + if (conf->mddev->queue) raid10_set_io_opt(conf); - } conf->fullsync = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9a7d1250894e..7ace1f76b147 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7522,8 +7522,6 @@ static int raid5_run(struct mddev *mddev) int data_disks = conf->previous_raid_disks - conf->max_degraded; int stripe = data_disks * ((mddev->chunk_sectors << 9) / PAGE_SIZE); - if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - mddev->queue->backing_dev_info->ra_pages = 2 * stripe; chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); @@ -8111,17 +8109,8 @@ static void end_reshape(struct r5conf *conf) spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); - /* read-ahead size must cover two whole stripes, which is - * 2 * (datadisks) * chunksize where 'n' is the number of raid devices - */ - if (conf->mddev->queue) { - int data_disks = conf->raid_disks - conf->max_degraded; - int stripe = data_disks * ((conf->chunk_sectors << 9) - / PAGE_SIZE); - if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; + if (conf->mddev->queue) raid5_set_io_opt(conf); - } } } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ea1fa41fbba8..741c9bfa8e14 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2147,6 +2147,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) nvme_update_disk_info(ns->head->disk, ns, id); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); + blk_queue_update_readahead(ns->head->disk->queue); nvme_update_bdev_size(ns->head->disk); } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index be5ef6f4ba19..282f5ca424f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1140,6 +1140,7 @@ extern void blk_queue_max_zone_append_sectors(struct request_queue *q, extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); +void blk_queue_update_readahead(struct request_queue *q); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); -- cgit v1.2.3 From 1cb039f3dc1619eb795c54aad0a98fdb379b4237 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:38 +0200 Subject: bdi: replace BDI_CAP_STABLE_WRITES with a queue and a sb flag The BDI_CAP_STABLE_WRITES is one of the few bits of information in the backing_dev_info shared between the block drivers and the writeback code. To help untangling the dependency replace it with a queue flag and a superblock flag derived from it. This also helps with the case of e.g. a file system requiring stable writes due to its own checksumming, but not forcing it on other users of the block device like the swap code. One downside is that we an't support the stable_pages_required bdi attribute in sysfs anymore. It is replaced with a queue attribute which also is writable for easier testing. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-integrity.c | 4 ++-- block/blk-mq-debugfs.c | 1 + block/blk-sysfs.c | 3 +++ drivers/block/rbd.c | 2 +- drivers/block/zram/zram_drv.c | 2 +- drivers/md/dm-table.c | 6 +++--- drivers/md/raid5.c | 8 ++++---- drivers/mmc/core/queue.c | 3 +-- drivers/nvme/host/core.c | 3 +-- drivers/nvme/host/multipath.c | 10 +++------- drivers/scsi/iscsi_tcp.c | 4 ++-- fs/super.c | 2 ++ include/linux/backing-dev.h | 6 ------ include/linux/blkdev.h | 3 +++ include/linux/fs.h | 1 + mm/backing-dev.c | 7 +++---- mm/page-writeback.c | 2 +- mm/swapfile.c | 2 +- 18 files changed, 33 insertions(+), 36 deletions(-) (limited to 'drivers/nvme') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index c03705cbb9c9..2b36a8f9b813 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -408,7 +408,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template bi->tuple_size = template->tuple_size; bi->tag_size = template->tag_size; - disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); #ifdef CONFIG_BLK_INLINE_ENCRYPTION if (disk->queue->ksm) { @@ -428,7 +428,7 @@ EXPORT_SYMBOL(blk_integrity_register); */ void blk_integrity_unregister(struct gendisk *disk) { - disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue); memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity)); } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 645b7f800cb8..3094542e12ae 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -116,6 +116,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(DEAD), QUEUE_FLAG_NAME(INIT_DONE), + QUEUE_FLAG_NAME(STABLE_WRITES), QUEUE_FLAG_NAME(POLL), QUEUE_FLAG_NAME(WC), QUEUE_FLAG_NAME(FUA), diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 869ed21a9edc..76b54c7750b0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -287,6 +287,7 @@ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1); QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); +QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); #undef QUEUE_SYSFS_BIT_FNS static ssize_t queue_zoned_show(struct request_queue *q, char *page) @@ -613,6 +614,7 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { QUEUE_RW_ENTRY(queue_nonrot, "rotational"); QUEUE_RW_ENTRY(queue_iostats, "iostats"); QUEUE_RW_ENTRY(queue_random, "add_random"); +QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); static struct attribute *queue_attrs[] = { &queue_requests_entry.attr, @@ -645,6 +647,7 @@ static struct attribute *queue_attrs[] = { &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, + &queue_stable_writes_entry.attr, &queue_random_entry.attr, &queue_poll_entry.attr, &queue_wc_entry.attr, diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5d3923c0997c..cf5b016358cd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5022,7 +5022,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) } if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) - q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); /* * disk_release() expects a queue ref from add_disk() and will diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e21ca844d7c2..bff3d4021c18 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1955,7 +1955,7 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - zram->disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); device_add_disk(NULL, zram->disk, zram_disk_attr_groups); strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ef2757012f59..405d7cf10eb9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1815,7 +1815,7 @@ static int device_requires_stable_pages(struct dm_target *ti, { struct request_queue *q = bdev_get_queue(dev->bdev); - return q && bdi_cap_stable_pages_required(q->backing_dev_info); + return q && blk_queue_stable_writes(q); } /* @@ -1900,9 +1900,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, * because they do their own checksumming. */ if (dm_table_requires_stable_pages(t)) - q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); else - q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); /* * Determine whether or not this queue's I/O timings contribute diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7ace1f76b147..d589d26c86ea 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6638,14 +6638,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) if (!conf) err = -ENODEV; else if (new != conf->skip_copy) { + struct request_queue *q = mddev->queue; + mddev_suspend(mddev); conf->skip_copy = new; if (new) - mddev->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); else - mddev->queue->backing_dev_info->capabilities &= - ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); mddev_resume(mddev); } mddev_unlock(mddev); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 6c022ef0f84d..80fe3852ce0f 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -472,8 +472,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) } if (mmc_host_is_spi(host) && host->use_spi_crc) - mq->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); mq->queue->queuedata = mq; blk_queue_rq_timeout(mq->queue, 60 * HZ); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 741c9bfa8e14..c190c56bf702 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3926,8 +3926,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) goto out_free_ns; if (ctrl->opts && ctrl->opts->data_digest) - ns->queue->backing_dev_info->capabilities - |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); if (ctrl->ops->flags & NVME_F_PCI_P2PDMA) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d4ba736c6c89..74896be40c17 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -673,13 +673,9 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) nvme_mpath_set_live(ns); } - if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { - struct gendisk *disk = ns->head->disk; - - if (disk) - disk->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; - } + if (blk_queue_stable_writes(ns->queue) && ns->head->disk) + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, + ns->head->disk->queue); } void nvme_mpath_remove_disk(struct nvme_ns_head *head) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index b5dd1caae5e9..a622f334c933 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -962,8 +962,8 @@ static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev) struct iscsi_conn *conn = session->leadconn; if (conn->datadgst_en) - sdev->request_queue->backing_dev_info->capabilities - |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, + sdev->request_queue); blk_queue_dma_alignment(sdev->request_queue, 0); return 0; } diff --git a/fs/super.c b/fs/super.c index 904459b35119..a51c2083cd6b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1256,6 +1256,8 @@ static int set_bdev_super(struct super_block *s, void *data) s->s_dev = s->s_bdev->bd_dev; s->s_bdi = bdi_get(s->s_bdev->bd_bdi); + if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue)) + s->s_iflags |= SB_I_STABLE_WRITES; return 0; } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 860ea33571bc..5da4ea3dd0cc 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -126,7 +126,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 #define BDI_CAP_NO_ACCT_WB 0x00000004 -#define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 @@ -170,11 +169,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(int sync, long timeout); -static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) -{ - return bdi->capabilities & BDI_CAP_STABLE_WRITES; -} - static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) { return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 282f5ca424f1..8e77f12de522 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -606,6 +606,7 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ +#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 17 /* Write back caching */ #define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ @@ -635,6 +636,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_stable_writes(q) \ + test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) diff --git a/include/linux/fs.h b/include/linux/fs.h index fbd74df5ce5f..222465b7cf41 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1385,6 +1385,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ +#define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 2dac3be61271..8e3802bf03a9 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -204,10 +204,9 @@ static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, char *page) { - struct backing_dev_info *bdi = dev_get_drvdata(dev); - - return snprintf(page, PAGE_SIZE-1, "%d\n", - bdi_cap_stable_pages_required(bdi) ? 1 : 0); + dev_warn_once(dev, + "the stable_pages_required attribute has been removed. Use the stable_writes queue attribute instead.\n"); + return snprintf(page, PAGE_SIZE-1, "%d\n", 0); } static DEVICE_ATTR_RO(stable_pages_required); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4e4ddd67b71e..e9c36521461a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2849,7 +2849,7 @@ EXPORT_SYMBOL_GPL(wait_on_page_writeback); */ void wait_for_stable_page(struct page *page) { - if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host))) + if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) wait_on_page_writeback(page); } EXPORT_SYMBOL_GPL(wait_for_stable_page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 96a7c47dd533..65ef407512b5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3237,7 +3237,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap_unlock_inode; } - if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) + if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue)) p->flags |= SWP_STABLE_WRITES; if (p->bdev && p->bdev->bd_disk->fops->rw_page) -- cgit v1.2.3