summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/blk-core.c369
-rw-r--r--block/blk-flush.c3
-rw-r--r--block/elevator.c6
-rw-r--r--include/linux/blk_types.h2
-rw-r--r--include/linux/blkdev.h42
-rw-r--r--include/linux/elevator.h1
-rw-r--r--include/linux/sched.h6
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/sched.c12
10 files changed, 344 insertions, 101 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index e958c7a1e46..6efb55cc5af 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -27,6 +27,7 @@
#include <linux/writeback.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/fault-inject.h>
+#include <linux/list_sort.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -203,7 +204,7 @@ static void blk_delay_work(struct work_struct *work)
q = container_of(work, struct request_queue, delay_work.work);
spin_lock_irq(q->queue_lock);
- q->request_fn(q);
+ __blk_run_queue(q);
spin_unlock_irq(q->queue_lock);
}
@@ -686,6 +687,8 @@ int blk_get_queue(struct request_queue *q)
static inline void blk_free_request(struct request_queue *q, struct request *rq)
{
+ BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
+
if (rq->cmd_flags & REQ_ELVPRIV)
elv_put_request(q, rq);
mempool_free(rq, q->rq.rq_pool);
@@ -1051,6 +1054,13 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
}
EXPORT_SYMBOL(blk_requeue_request);
+static void add_acct_request(struct request_queue *q, struct request *rq,
+ int where)
+{
+ drive_stat_acct(rq, 1);
+ __elv_add_request(q, rq, where, 0);
+}
+
/**
* blk_insert_request - insert a special request into a request queue
* @q: request queue where request should be inserted
@@ -1093,8 +1103,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
if (blk_rq_tagged(rq))
blk_queue_end_tag(q, rq);
- drive_stat_acct(rq, 1);
- __elv_add_request(q, rq, where, 0);
+ add_acct_request(q, rq, where);
__blk_run_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
@@ -1215,6 +1224,113 @@ void blk_add_request_payload(struct request *rq, struct page *page,
}
EXPORT_SYMBOL_GPL(blk_add_request_payload);
+static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
+ struct bio *bio)
+{
+ const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+
+ /*
+ * Debug stuff, kill later
+ */
+ if (!rq_mergeable(req)) {
+ blk_dump_rq_flags(req, "back");
+ return false;
+ }
+
+ if (!ll_back_merge_fn(q, req, bio))
+ return false;
+
+ trace_block_bio_backmerge(q, bio);
+
+ if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
+ blk_rq_set_mixed_merge(req);
+
+ req->biotail->bi_next = bio;
+ req->biotail = bio;
+ req->__data_len += bio->bi_size;
+ req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
+
+ drive_stat_acct(req, 0);
+ return true;
+}
+
+static bool bio_attempt_front_merge(struct request_queue *q,
+ struct request *req, struct bio *bio)
+{
+ const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+ sector_t sector;
+
+ /*
+ * Debug stuff, kill later
+ */
+ if (!rq_mergeable(req)) {
+ blk_dump_rq_flags(req, "front");
+ return false;
+ }
+
+ if (!ll_front_merge_fn(q, req, bio))
+ return false;
+
+ trace_block_bio_frontmerge(q, bio);
+
+ if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
+ blk_rq_set_mixed_merge(req);
+
+ sector = bio->bi_sector;
+
+ bio->bi_next = req->bio;
+ req->bio = bio;
+
+ /*
+ * may not be valid. if the low level driver said
+ * it didn't need a bounce buffer then it better
+ * not touch req->buffer either...
+ */
+ req->buffer = bio_data(bio);
+ req->__sector = bio->bi_sector;
+ req->__data_len += bio->bi_size;
+ req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
+
+ drive_stat_acct(req, 0);
+ return true;
+}
+
+/*
+ * Attempts to merge with the plugged list in the current process. Returns
+ * true if merge was succesful, otherwise false.
+ */
+static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
+ struct bio *bio)
+{
+ struct blk_plug *plug;
+ struct request *rq;
+ bool ret = false;
+
+ plug = tsk->plug;
+ if (!plug)
+ goto out;
+
+ list_for_each_entry_reverse(rq, &plug->list, queuelist) {
+ int el_ret;
+
+ if (rq->q != q)
+ continue;
+
+ el_ret = elv_try_merge(rq, bio);
+ if (el_ret == ELEVATOR_BACK_MERGE) {
+ ret = bio_attempt_back_merge(q, rq, bio);
+ if (ret)
+ break;
+ } else if (el_ret == ELEVATOR_FRONT_MERGE) {
+ ret = bio_attempt_front_merge(q, rq, bio);
+ if (ret)
+ break;
+ }
+ }
+out:
+ return ret;
+}
+
void init_request_from_bio(struct request *req, struct bio *bio)
{
req->cpu = bio->bi_comp_cpu;
@@ -1230,26 +1346,12 @@ void init_request_from_bio(struct request *req, struct bio *bio)
blk_rq_bio_prep(req->q, req, bio);
}
-/*
- * Only disabling plugging for non-rotational devices if it does tagging
- * as well, otherwise we do need the proper merging
- */
-static inline bool queue_should_plug(struct request_queue *q)
-{
- return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
-}
-
static int __make_request(struct request_queue *q, struct bio *bio)
{
- struct request *req;
- int el_ret;
- unsigned int bytes = bio->bi_size;
- const unsigned short prio = bio_prio(bio);
const bool sync = !!(bio->bi_rw & REQ_SYNC);
- const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
- const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
- int where = ELEVATOR_INSERT_SORT;
- int rw_flags;
+ struct blk_plug *plug;
+ int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
+ struct request *req;
/*
* low level driver can indicate that it wants pages above a
@@ -1258,78 +1360,36 @@ static int __make_request(struct request_queue *q, struct bio *bio)
*/
blk_queue_bounce(q, &bio);
- spin_lock_irq(q->queue_lock);
-
if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+ spin_lock_irq(q->queue_lock);
where = ELEVATOR_INSERT_FLUSH;
goto get_rq;
}
- if (elv_queue_empty(q))
- goto get_rq;
-
- el_ret = elv_merge(q, &req, bio);
- switch (el_ret) {
- case ELEVATOR_BACK_MERGE:
- BUG_ON(!rq_mergeable(req));
-
- if (!ll_back_merge_fn(q, req, bio))
- break;
-
- trace_block_bio_backmerge(q, bio);
-
- if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
- blk_rq_set_mixed_merge(req);
-
- req->biotail->bi_next = bio;
- req->biotail = bio;
- req->__data_len += bytes;
- req->ioprio = ioprio_best(req->ioprio, prio);
- if (!blk_rq_cpu_valid(req))
- req->cpu = bio->bi_comp_cpu;
- drive_stat_acct(req, 0);
- elv_bio_merged(q, req, bio);
- if (!attempt_back_merge(q, req))
- elv_merged_request(q, req, el_ret);
+ /*
+ * Check if we can merge with the plugged list before grabbing
+ * any locks.
+ */
+ if (attempt_plug_merge(current, q, bio))
goto out;
- case ELEVATOR_FRONT_MERGE:
- BUG_ON(!rq_mergeable(req));
-
- if (!ll_front_merge_fn(q, req, bio))
- break;
-
- trace_block_bio_frontmerge(q, bio);
+ spin_lock_irq(q->queue_lock);
- if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
- blk_rq_set_mixed_merge(req);
- req->cmd_flags &= ~REQ_FAILFAST_MASK;
- req->cmd_flags |= ff;
+ el_ret = elv_merge(q, &req, bio);
+ if (el_ret == ELEVATOR_BACK_MERGE) {
+ BUG_ON(req->cmd_flags & REQ_ON_PLUG);
+ if (bio_attempt_back_merge(q, req, bio)) {
+ if (!attempt_back_merge(q, req))
+ elv_merged_request(q, req, el_ret);
+ goto out_unlock;
+ }
+ } else if (el_ret == ELEVATOR_FRONT_MERGE) {
+ BUG_ON(req->cmd_flags & REQ_ON_PLUG);
+ if (bio_attempt_front_merge(q, req, bio)) {
+ if (!attempt_front_merge(q, req))
+ elv_merged_request(q, req, el_ret);
+ goto out_unlock;
}
-
- bio->bi_next = req->bio;
- req->bio = bio;
-
- /*
- * may not be valid. if the low level driver said
- * it didn't need a bounce buffer then it better
- * not touch req->buffer either...
- */
- req->buffer = bio_data(bio);
- req->__sector = bio->bi_sector;
- req->__data_len += bytes;
- req->ioprio = ioprio_best(req->ioprio, prio);
- if (!blk_rq_cpu_valid(req))
- req->cpu = bio->bi_comp_cpu;
- drive_stat_acct(req, 0);
- elv_bio_merged(q, req, bio);
- if (!attempt_front_merge(q, req))
- elv_merged_request(q, req, el_ret);
- goto out;
-
- /* ELV_NO_MERGE: elevator says don't/can't merge. */
- default:
- ;
}
get_rq:
@@ -1356,20 +1416,35 @@ get_rq:
*/
init_request_from_bio(req, bio);
- spin_lock_irq(q->queue_lock);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
- bio_flagged(bio, BIO_CPU_AFFINE))
- req->cpu = blk_cpu_to_group(smp_processor_id());
- if (queue_should_plug(q) && elv_queue_empty(q))
- blk_plug_device(q);
-
- /* insert the request into the elevator */
- drive_stat_acct(req, 1);
- __elv_add_request(q, req, where, 0);
+ bio_flagged(bio, BIO_CPU_AFFINE)) {
+ req->cpu = blk_cpu_to_group(get_cpu());
+ put_cpu();
+ }
+
+ plug = current->plug;
+ if (plug && !sync) {
+ if (!plug->should_sort && !list_empty(&plug->list)) {
+ struct request *__rq;
+
+ __rq = list_entry_rq(plug->list.prev);
+ if (__rq->q != q)
+ plug->should_sort = 1;
+ }
+ /*
+ * Debug flag, kill later
+ */
+ req->cmd_flags |= REQ_ON_PLUG;
+ list_add_tail(&req->queuelist, &plug->list);
+ drive_stat_acct(req, 1);
+ } else {
+ spin_lock_irq(q->queue_lock);
+ add_acct_request(q, req, where);
+ __blk_run_queue(q);
+out_unlock:
+ spin_unlock_irq(q->queue_lock);
+ }
out:
- if (unplug || !queue_should_plug(q))
- __generic_unplug_device(q);
- spin_unlock_irq(q->queue_lock);
return 0;
}
@@ -1772,9 +1847,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
*/
BUG_ON(blk_queued_rq(rq));
- drive_stat_acct(rq, 1);
- __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
-
+ add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
spin_unlock_irqrestore(q->queue_lock, flags);
return 0;
@@ -2659,6 +2732,106 @@ int kblockd_schedule_delayed_work(struct request_queue *q,
}
EXPORT_SYMBOL(kblockd_schedule_delayed_work);
+#define PLUG_MAGIC 0x91827364
+
+void blk_start_plug(struct blk_plug *plug)
+{
+ struct task_struct *tsk = current;
+
+ plug->magic = PLUG_MAGIC;
+ INIT_LIST_HEAD(&plug->list);
+ plug->should_sort = 0;
+
+ /*
+ * If this is a nested plug, don't actually assign it. It will be
+ * flushed on its own.
+ */
+ if (!tsk->plug) {
+ /*
+ * Store ordering should not be needed here, since a potential
+ * preempt will imply a full memory barrier
+ */
+ tsk->plug = plug;
+ }
+}
+EXPORT_SYMBOL(blk_start_plug);
+
+static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct request *rqa = container_of(a, struct request, queuelist);
+ struct request *rqb = container_of(b, struct request, queuelist);
+
+ return !(rqa->q == rqb->q);
+}
+
+static void flush_plug_list(struct blk_plug *plug)
+{
+ struct request_queue *q;
+ unsigned long flags;
+ struct request *rq;
+
+ BUG_ON(plug->magic != PLUG_MAGIC);
+
+ if (list_empty(&plug->list))
+ return;
+
+ if (plug->should_sort)
+ list_sort(NULL, &plug->list, plug_rq_cmp);
+
+ q = NULL;
+ local_irq_save(flags);
+ while (!list_empty(&plug->list)) {
+ rq = list_entry_rq(plug->list.next);
+ list_del_init(&rq->queuelist);
+ BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
+ BUG_ON(!rq->q);
+ if (rq->q != q) {
+ if (q) {
+ __blk_run_queue(q);
+ spin_unlock(q->queue_lock);
+ }
+ q = rq->q;
+ spin_lock(q->queue_lock);
+ }
+ rq->cmd_flags &= ~REQ_ON_PLUG;
+
+ /*
+ * rq is already accounted, so use raw insert
+ */
+ __elv_add_request(q, rq, ELEVATOR_INSERT_SORT, 0);
+ }
+
+ if (q) {
+ __blk_run_queue(q);
+ spin_unlock(q->queue_lock);
+ }
+
+ BUG_ON(!list_empty(&plug->list));
+ local_irq_restore(flags);
+}
+
+static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
+{
+ flush_plug_list(plug);
+
+ if (plug == tsk->plug)
+ tsk->plug = NULL;
+}
+
+void blk_finish_plug(struct blk_plug *plug)
+{
+ if (plug)
+ __blk_finish_plug(current, plug);
+}
+EXPORT_SYMBOL(blk_finish_plug);
+
+void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
+{
+ __blk_finish_plug(tsk, plug);
+ tsk->plug = plug;
+}
+EXPORT_SYMBOL(__blk_flush_plug);
+
int __init blk_dev_init(void)
{
BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-flush.c b/block/blk-flush.c
index a867e3f524f..1e2aa8a8908 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -264,10 +264,9 @@ static bool blk_kick_flush(struct request_queue *q)
static void flush_data_end_io(struct request *rq, int error)
{
struct request_queue *q = rq->q;
- bool was_empty = elv_queue_empty(q);
/* after populating an empty queue, kick it to avoid stall */
- if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty)
+ if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
__blk_run_queue(q);
}
diff --git a/block/elevator.c b/block/elevator.c
index f98e92edc93..25713927c0d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
}
EXPORT_SYMBOL(elv_rq_merge_ok);
-static inline int elv_try_merge(struct request *__rq, struct bio *bio)
+int elv_try_merge(struct request *__rq, struct bio *bio)
{
int ret = ELEVATOR_NO_MERGE;
@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
struct list_head *entry;
int stop_flags;
+ BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
+
if (q->last_merge == rq)
q->last_merge = NULL;
@@ -696,6 +698,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
int plug)
{
+ BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
+
if (rq->cmd_flags & REQ_SOFTBARRIER) {
/* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS ||
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dddedfc0af8..16b28647304 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -152,6 +152,7 @@ enum rq_flag_bits {
__REQ_IO_STAT, /* account I/O stat */
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
+ __REQ_ON_PLUG, /* on plug list */
__REQ_NR_BITS, /* stops here */
};
@@ -193,5 +194,6 @@ enum rq_flag_bits {
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
#define REQ_SECURE (1 << __REQ_SECURE)
+#define REQ_ON_PLUG (1 << __REQ_ON_PLUG)
#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f55b2a8b661..5873037eeb9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -871,6 +871,31 @@ struct request_queue *blk_alloc_queue(gfp_t);
struct request_queue *blk_alloc_queue_node(gfp_t, int);
extern void blk_put_queue(struct request_queue *);
+struct blk_plug {
+ unsigned long magic;
+ struct list_head list;
+ unsigned int should_sort;
+};
+
+extern void blk_start_plug(struct blk_plug *);
+extern void blk_finish_plug(struct blk_plug *);
+extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
+
+static inline void blk_flush_plug(struct task_struct *tsk)
+{
+ struct blk_plug *plug = tsk->plug;
+
+ if (unlikely(plug))
+ __blk_flush_plug(tsk, plug);
+}
+
+static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+{
+ struct blk_plug *plug = tsk->plug;
+
+ return plug && !list_empty(&plug->list);
+}
+
/*
* tag stuff
*/
@@ -1294,6 +1319,23 @@ static inline long nr_blockdev_pages(void)
return 0;
}
+static inline void blk_start_plug(struct list_head *list)
+{
+}
+
+static inline void blk_finish_plug(struct list_head *list)
+{
+}
+
+static inline void blk_flush_plug(struct task_struct *tsk)
+{
+}
+
+static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+{
+ return false;
+}
+
#endif /* CONFIG_BLOCK */
#endif
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 39b68edb388..8857cf9adbb 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -105,6 +105,7 @@ extern void elv_add_request(struct request_queue *, struct request *, int, int);
extern void __elv_add_request(struct request_queue *, struct request *, int, int);
extern void elv_insert(struct request_queue *, struct request *, int);
extern int elv_merge(struct request_queue *, struct request **, struct bio *);
+extern int elv_try_merge(struct request *, struct bio *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
extern void elv_merged_request(struct request_queue *, struct request *, int);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 777d8a5ed06..96ac2264374 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -99,6 +99,7 @@ struct robust_list_head;
struct bio_list;
struct fs_struct;
struct perf_event_context;
+struct blk_plug;
/*
* List of flags we want to share for kernel threads,
@@ -1429,6 +1430,11 @@ struct task_struct {
/* stacked block device info */
struct bio_list *bio_list;
+#ifdef CONFIG_BLOCK
+/* stack plugging */
+ struct blk_plug *plug;
+#endif
+
/* VM state */
struct reclaim_state *reclaim_state;
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45ebcc7b..6a488ad2dce 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
profile_task_exit(tsk);
WARN_ON(atomic_read(&tsk->fs_excl));
+ WARN_ON(blk_needs_flush_plug(tsk));
if (unlikely(in_interrupt()))
panic("Aiee, killing interrupt handler!");
diff --git a/kernel/fork.c b/kernel/fork.c
index 25e429152dd..027c80e5162 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
+#ifdef CONFIG_BLOCK
+ p->plug = NULL;
+#endif
#ifdef CONFIG_FUTEX
p->robust_list = NULL;
#ifdef CONFIG_COMPAT
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7b..ca098bf4cc6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3978,6 +3978,16 @@ need_resched_nonpreemptible:
switch_count = &prev->nvcsw;
}
+ /*
+ * If we are going to sleep and we have plugged IO queued, make
+ * sure to submit it to avoid deadlocks.
+ */
+ if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
+ raw_spin_unlock(&rq->lock);
+ blk_flush_plug(prev);
+ raw_spin_lock(&rq->lock);
+ }
+
pre_schedule(rq, prev);
if (unlikely(!rq->nr_running))
@@ -5333,6 +5343,7 @@ void __sched io_schedule(void)
delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
+ blk_flush_plug(current);
current->in_iowait = 1;
schedule();
current->in_iowait = 0;
@@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout)
delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
+ blk_flush_plug(current);
current->in_iowait = 1;
ret = schedule_timeout(timeout);
current->in_iowait = 0;