diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-iosched.c | 2 | ||||
-rw-r--r-- | block/bfq-wf2q.c | 2 | ||||
-rw-r--r-- | block/bio.c | 43 | ||||
-rw-r--r-- | block/blk-cgroup.c | 9 | ||||
-rw-r--r-- | block/blk-core.c | 4 | ||||
-rw-r--r-- | block/blk-flush.c | 4 | ||||
-rw-r--r-- | block/blk-iolatency.c | 1 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 8 | ||||
-rw-r--r-- | block/blk-mq.c | 153 | ||||
-rw-r--r-- | block/blk-mq.h | 17 | ||||
-rw-r--r-- | block/blk-sysfs.c | 12 |
11 files changed, 135 insertions, 120 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 4c592496a16a..fac188dd78fa 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -674,7 +674,7 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) * at least two nodes. */ return !(varied_queue_weights || multiple_classes_busy -#ifdef BFQ_GROUP_IOSCHED_ENABLED +#ifdef CONFIG_BFQ_GROUP_IOSCHED || bfqd->num_groups_with_pending_reqs > 0 #endif ); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 63311d1ff1ed..a11bef75483d 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1012,7 +1012,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity, entity->on_st = true; } -#ifdef BFQ_GROUP_IOSCHED_ENABLED +#ifdef CONFIG_BFQ_GROUP_IOSCHED if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */ struct bfq_group *bfqg = container_of(entity, struct bfq_group, entity); diff --git a/block/bio.c b/block/bio.c index 71a78d9fb8b7..b64cedc7f87c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -849,20 +849,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) size = bio_add_page(bio, bv->bv_page, len, bv->bv_offset + iter->iov_offset); if (size == len) { - struct page *page; - int i; + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { + struct page *page; + int i; + + mp_bvec_for_each_page(page, bv, i) + get_page(page); + } - /* - * For the normal O_DIRECT case, we could skip grabbing this - * reference and then not have to put them again when IO - * completes. But this breaks some in-kernel users, like - * splicing to/from a loop device, where we release the pipe - * pages unconditionally. If we can fix that case, we can - * get rid of the get here and the need to call - * bio_release_pages() at IO completion time. - */ - mp_bvec_for_each_page(page, bv, i) - get_page(page); iov_iter_advance(iter, size); return 0; } @@ -925,10 +919,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) * This takes either an iterator pointing to user memory, or one pointing to * kernel pages (BVEC iterator). If we're adding user pages, we pin them and * map them into the kernel. On IO completion, the caller should put those - * pages. For now, when adding kernel pages, we still grab a reference to the - * page. This isn't strictly needed for the common case, but some call paths - * end up releasing pages from eg a pipe and we can't easily control these. - * See comment in __bio_iov_bvec_add_pages(). + * pages. If we're adding kernel pages, and the caller told us it's safe to + * do so, we just have to add the pages to the bio directly. We don't grab an + * extra reference to those pages (the user should already have that), and we + * don't put the page on IO completion. The caller needs to check if the bio is + * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be + * released. * * The function tries, but does not guarantee, to pin as many pages as * fit into the bio, or are requested in *iter, whatever is smaller. If @@ -940,6 +936,13 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) const bool is_bvec = iov_iter_is_bvec(iter); unsigned short orig_vcnt = bio->bi_vcnt; + /* + * If this is a BVEC iter, then the pages are kernel pages. Don't + * release them on IO completion, if the caller asked us to. + */ + if (is_bvec && iov_iter_bvec_no_ref(iter)) + bio_set_flag(bio, BIO_NO_PAGE_REF); + do { int ret; @@ -1696,7 +1699,8 @@ static void bio_dirty_fn(struct work_struct *work) next = bio->bi_private; bio_set_pages_dirty(bio); - bio_release_pages(bio); + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + bio_release_pages(bio); bio_put(bio); } } @@ -1713,7 +1717,8 @@ void bio_check_pages_dirty(struct bio *bio) goto defer; } - bio_release_pages(bio); + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + bio_release_pages(bio); bio_put(bio); return; defer: diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 77f37ef8ef06..617a2b3f7582 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1736,8 +1736,8 @@ out: /** * blkcg_schedule_throttle - this task needs to check for throttling - * @q - the request queue IO was submitted on - * @use_memdelay - do we charge this to memory delay for PSI + * @q: the request queue IO was submitted on + * @use_memdelay: do we charge this to memory delay for PSI * * This is called by the IO controller when we know there's delay accumulated * for the blkg for this task. We do not pass the blkg because there are places @@ -1769,8 +1769,9 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) /** * blkcg_add_delay - add delay to this blkg - * @now - the current time in nanoseconds - * @delta - how many nanoseconds of delay to add + * @blkg: blkg of interest + * @now: the current time in nanoseconds + * @delta: how many nanoseconds of delay to add * * Charge @delta to the blkg's current delay accumulation. This is used to * throttle tasks if an IO controller thinks we need more throttling. diff --git a/block/blk-core.c b/block/blk-core.c index 4673ebe42255..a55389ba8779 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1245,8 +1245,6 @@ static int blk_cloned_rq_check_limits(struct request_queue *q, */ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) { - blk_qc_t unused; - if (blk_cloned_rq_check_limits(q, rq)) return BLK_STS_IOERR; @@ -1262,7 +1260,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * * bypass a potential scheduler on the bottom device for * insert. */ - return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused, true, true); + return blk_mq_request_issue_directly(rq, true); } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); diff --git a/block/blk-flush.c b/block/blk-flush.c index 6e0f2d97fc6d..d95f94892015 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -220,7 +220,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); flush_rq->tag = -1; } else { - blk_mq_put_driver_tag_hctx(hctx, flush_rq); + blk_mq_put_driver_tag(flush_rq); flush_rq->internal_tag = -1; } @@ -324,7 +324,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) if (q->elevator) { WARN_ON(rq->tag < 0); - blk_mq_put_driver_tag_hctx(hctx, rq); + blk_mq_put_driver_tag(rq); } /* diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 2620baa1f699..507212d75ee2 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -75,6 +75,7 @@ #include <linux/blk-mq.h> #include "blk-rq-qos.h" #include "blk-stat.h" +#include "blk.h" #define DEFAULT_SCALE_COOKIE 1000000U diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 40905539afed..aa6bc5c02643 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -423,10 +423,12 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, * busy in case of 'none' scheduler, and this way may save * us one extra enqueue & dequeue to sw queue. */ - if (!hctx->dispatch_busy && !e && !run_queue_async) + if (!hctx->dispatch_busy && !e && !run_queue_async) { blk_mq_try_issue_list_directly(hctx, list); - else - blk_mq_insert_requests(hctx, ctx, list); + if (list_empty(list)) + return; + } + blk_mq_insert_requests(hctx, ctx, list); } blk_mq_run_hw_queue(hctx, run_queue_async); diff --git a/block/blk-mq.c b/block/blk-mq.c index a9c181603cbd..a9354835cf51 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -59,7 +59,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) } /* - * Check if any of the ctx's have pending work in this hardware queue + * Check if any of the ctx, dispatch list or elevator + * have pending work in this hardware queue. */ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) { @@ -782,7 +783,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, if (kick_requeue_list) blk_mq_kick_requeue_list(q); } -EXPORT_SYMBOL(blk_mq_add_to_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q) { @@ -1072,7 +1072,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); spin_lock(&hctx->dispatch_wait_lock); - list_del_init(&wait->entry); + if (!list_empty(&wait->entry)) { + struct sbitmap_queue *sbq; + + list_del_init(&wait->entry); + sbq = &hctx->tags->bitmap_tags; + atomic_dec(&sbq->ws_active); + } spin_unlock(&hctx->dispatch_wait_lock); blk_mq_run_hw_queue(hctx, true); @@ -1088,13 +1094,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, struct request *rq) { + struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags; struct wait_queue_head *wq; wait_queue_entry_t *wait; bool ret; if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) { - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + blk_mq_sched_mark_restart_hctx(hctx); /* * It's possible that a tag was freed in the window between the @@ -1111,7 +1117,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, if (!list_empty_careful(&wait->entry)) return false; - wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait; + wq = &bt_wait_ptr(sbq, hctx)->wait; spin_lock_irq(&wq->lock); spin_lock(&hctx->dispatch_wait_lock); @@ -1121,6 +1127,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, return false; } + atomic_inc(&sbq->ws_active); wait->flags &= ~WQ_FLAG_EXCLUSIVE; __add_wait_queue(wq, wait); @@ -1141,6 +1148,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, * someone else gets the wakeup. */ list_del_init(&wait->entry); + atomic_dec(&sbq->ws_active); spin_unlock(&hctx->dispatch_wait_lock); spin_unlock_irq(&wq->lock); @@ -1703,11 +1711,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) unsigned int depth; list_splice_init(&plug->mq_list, &list); - plug->rq_count = 0; if (plug->rq_count > 2 && plug->multiple_queues) list_sort(NULL, &list, plug_rq_cmp); + plug->rq_count = 0; + this_q = NULL; this_hctx = NULL; this_ctx = NULL; @@ -1792,74 +1801,76 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, return ret; } -blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, +static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie, - bool bypass, bool last) + bool bypass_insert, bool last) { struct request_queue *q = rq->q; bool run_queue = true; - blk_status_t ret = BLK_STS_RESOURCE; - int srcu_idx; - bool force = false; - hctx_lock(hctx, &srcu_idx); /* - * hctx_lock is needed before checking quiesced flag. + * RCU or SRCU read lock is needed before checking quiesced flag. * - * When queue is stopped or quiesced, ignore 'bypass', insert - * and return BLK_STS_OK to caller, and avoid driver to try to - * dispatch again. + * When queue is stopped or quiesced, ignore 'bypass_insert' from + * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller, + * and avoid driver to try to dispatch again. */ - if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) { + if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) { run_queue = false; - bypass = false; - goto out_unlock; + bypass_insert = false; + goto insert; } - if (unlikely(q->elevator && !bypass)) - goto out_unlock; + if (q->elevator && !bypass_insert) + goto insert; if (!blk_mq_get_dispatch_budget(hctx)) - goto out_unlock; + goto insert; if (!blk_mq_get_driver_tag(rq)) { blk_mq_put_dispatch_budget(hctx); - goto out_unlock; + goto insert; } - /* - * Always add a request that has been through - *.queue_rq() to the hardware dispatch list. - */ - force = true; - ret = __blk_mq_issue_directly(hctx, rq, cookie, last); -out_unlock: + return __blk_mq_issue_directly(hctx, rq, cookie, last); +insert: + if (bypass_insert) + return BLK_STS_RESOURCE; + + blk_mq_request_bypass_insert(rq, run_queue); + return BLK_STS_OK; +} + +static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + struct request *rq, blk_qc_t *cookie) +{ + blk_status_t ret; + int srcu_idx; + + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); + + hctx_lock(hctx, &srcu_idx); + + ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); + if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) + blk_mq_request_bypass_insert(rq, true); + else if (ret != BLK_STS_OK) + blk_mq_end_request(rq, ret); + + hctx_unlock(hctx, srcu_idx); +} + +blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) +{ + blk_status_t ret; + int srcu_idx; + blk_qc_t unused_cookie; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + + hctx_lock(hctx, &srcu_idx); + ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last); hctx_unlock(hctx, srcu_idx); - switch (ret) { - case BLK_STS_OK: - break; - case BLK_STS_DEV_RESOURCE: - case BLK_STS_RESOURCE: - if (force) { - blk_mq_request_bypass_insert(rq, run_queue); - /* - * We have to return BLK_STS_OK for the DM - * to avoid livelock. Otherwise, we return - * the real result to indicate whether the - * request is direct-issued successfully. - */ - ret = bypass ? BLK_STS_OK : ret; - } else if (!bypass) { - blk_mq_sched_insert_request(rq, false, - run_queue, false); - } - break; - default: - if (!bypass) - blk_mq_end_request(rq, ret); - break; - } return ret; } @@ -1867,20 +1878,22 @@ out_unlock: void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list) { - blk_qc_t unused; - blk_status_t ret = BLK_STS_OK; - while (!list_empty(list)) { + blk_status_t ret; struct request *rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); - if (ret == BLK_STS_OK) - ret = blk_mq_try_issue_directly(hctx, rq, &unused, - false, + ret = blk_mq_request_issue_directly(rq, list_empty(list)); + if (ret != BLK_STS_OK) { + if (ret == BLK_STS_RESOURCE || + ret == BLK_STS_DEV_RESOURCE) { + blk_mq_request_bypass_insert(rq, list_empty(list)); - else - blk_mq_sched_insert_request(rq, false, true, false); + break; + } + blk_mq_end_request(rq, ret); + } } /* @@ -1888,7 +1901,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, * the driver there was more coming, but that turned out to * be a lie. */ - if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs) + if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs) hctx->queue->mq_ops->commit_rqs(hctx); } @@ -1995,19 +2008,21 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) plug->rq_count--; } blk_add_rq_to_plug(plug, rq); + trace_block_plug(q); blk_mq_put_ctx(data.ctx); if (same_queue_rq) { data.hctx = same_queue_rq->mq_hctx; + trace_block_unplug(q, 1, true); blk_mq_try_issue_directly(data.hctx, same_queue_rq, - &cookie, false, true); + &cookie); } } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && !data.hctx->dispatch_busy)) { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); - blk_mq_try_issue_directly(data.hctx, rq, &cookie, false, true); + blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); @@ -2324,7 +2339,7 @@ static int blk_mq_init_hctx(struct request_queue *q, return 0; free_fq: - kfree(hctx->fq); + blk_free_flush_queue(hctx->fq); exit_hctx: if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); @@ -2857,7 +2872,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, /* * Default to classic polling */ - q->poll_nsec = -1; + q->poll_nsec = BLK_MQ_POLL_CLASSIC; blk_mq_init_cpu_queues(q, set->nr_hw_queues); blk_mq_add_queue_tag_set(set, q); @@ -3392,7 +3407,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, { struct request *rq; - if (q->poll_nsec == -1) + if (q->poll_nsec == BLK_MQ_POLL_CLASSIC) return false; if (!blk_qc_t_is_internal(cookie)) diff --git a/block/blk-mq.h b/block/blk-mq.h index c11353a3749d..423ea88ab6fb 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -41,6 +41,8 @@ void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool); +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, + bool kick_requeue_list); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_get_driver_tag(struct request *rq); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, @@ -68,10 +70,8 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); -blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, - struct request *rq, - blk_qc_t *cookie, - bool bypass, bool last); +/* Used by blk_insert_cloned_request() to issue request directly */ +blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last); void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list); @@ -222,15 +222,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, } } -static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - if (rq->tag == -1 || rq->internal_tag == -1) - return; - - __blk_mq_put_driver_tag(hctx, rq); -} - static inline void blk_mq_put_driver_tag(struct request *rq) { if (rq->tag == -1 || rq->internal_tag == -1) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 59685918167e..422327089e0f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -360,8 +360,8 @@ static ssize_t queue_poll_delay_show(struct request_queue *q, char *page) { int val; - if (q->poll_nsec == -1) - val = -1; + if (q->poll_nsec == BLK_MQ_POLL_CLASSIC) + val = BLK_MQ_POLL_CLASSIC; else val = q->poll_nsec / 1000; @@ -380,10 +380,12 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page, if (err < 0) return err; - if (val == -1) - q->poll_nsec = -1; - else + if (val == BLK_MQ_POLL_CLASSIC) + q->poll_nsec = BLK_MQ_POLL_CLASSIC; + else if (val >= 0) q->poll_nsec = val * 1000; + else + return -EINVAL; return count; } |