From 00380a404fc4235e9b8b39598138bd3223a27b8a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 23 Mar 2012 09:58:54 +0100
Subject: block: blk_alloc_queue_node(): use caller's GFP flags instead of
 GFP_KERNEL

We should use the GFP flags that the caller specified instead of picking
our own.  All the callers specify GFP_KERNEL so this doesn't make a
difference to how the kernel runs, it's just a cleanup.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 3a78b00edd7..414e8224588 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -483,7 +483,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (!q)
 		return NULL;
 
-	q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
+	q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
 	if (q->id < 0)
 		goto fail_q;
 
-- 
cgit v1.2.3


From 8bcb6c7d48eb341b1f49f814cdcbe05eb6f15680 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 30 Mar 2012 12:33:28 +0200
Subject: block: use lockdep_assert_held for queue locking

Instead of an ugly open coded variant.

Cc: axboe@kernel.dk
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-throttle.c   |  2 +-
 include/linux/blkdev.h | 18 +++++++-----------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 5eed6a76721..f2ddb94626b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1218,7 +1218,7 @@ void blk_throtl_drain(struct request_queue *q)
 	struct bio_list bl;
 	struct bio *bio;
 
-	WARN_ON_ONCE(!queue_is_locked(q));
+	queue_lockdep_assert_held(q);
 
 	bio_list_init(&bl);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 606cf339bb5..2aa24664a5b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -426,14 +426,10 @@ struct request_queue {
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_ADD_RANDOM))
 
-static inline int queue_is_locked(struct request_queue *q)
+static inline void queue_lockdep_assert_held(struct request_queue *q)
 {
-#ifdef CONFIG_SMP
-	spinlock_t *lock = q->queue_lock;
-	return lock && spin_is_locked(lock);
-#else
-	return 1;
-#endif
+	if (q->queue_lock)
+		lockdep_assert_held(q->queue_lock);
 }
 
 static inline void queue_flag_set_unlocked(unsigned int flag,
@@ -445,7 +441,7 @@ static inline void queue_flag_set_unlocked(unsigned int flag,
 static inline int queue_flag_test_and_clear(unsigned int flag,
 					    struct request_queue *q)
 {
-	WARN_ON_ONCE(!queue_is_locked(q));
+	queue_lockdep_assert_held(q);
 
 	if (test_bit(flag, &q->queue_flags)) {
 		__clear_bit(flag, &q->queue_flags);
@@ -458,7 +454,7 @@ static inline int queue_flag_test_and_clear(unsigned int flag,
 static inline int queue_flag_test_and_set(unsigned int flag,
 					  struct request_queue *q)
 {
-	WARN_ON_ONCE(!queue_is_locked(q));
+	queue_lockdep_assert_held(q);
 
 	if (!test_bit(flag, &q->queue_flags)) {
 		__set_bit(flag, &q->queue_flags);
@@ -470,7 +466,7 @@ static inline int queue_flag_test_and_set(unsigned int flag,
 
 static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
 {
-	WARN_ON_ONCE(!queue_is_locked(q));
+	queue_lockdep_assert_held(q);
 	__set_bit(flag, &q->queue_flags);
 }
 
@@ -487,7 +483,7 @@ static inline int queue_in_flight(struct request_queue *q)
 
 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 {
-	WARN_ON_ONCE(!queue_is_locked(q));
+	queue_lockdep_assert_held(q);
 	__clear_bit(flag, &q->queue_flags);
 }
 
-- 
cgit v1.2.3


From 5bf14c0727a07ded1bd9fa6d77923d7e6dc32833 Mon Sep 17 00:00:00 2001
From: Tao Ma <boyu.mt@taobao.com>
Date: Sun, 1 Apr 2012 14:33:39 -0700
Subject: block: Make cfq_target_latency tunable through sysfs.

In cfq, when we calculate a time slice for a process(or a cfqq to
be precise), we have to consider the cfq_target_latency so that all the
sync request have an estimated latency(300ms) and it is controlled by
cfq_target_latency. But in some hadoop test, we have found that if
there are many processes doing sequential read(24 for example), the
throughput is bad because every process can only work for about 25ms
and the cfqq is switched. That leads to a higher disk seek. We can
achive the good throughput by setting low_latency=0, but then some
read's latency is too much for the application.

So this patch makes cfq_target_latency tunable through sysfs so that
we can tune it and find some magic number which is not bad for both
the throughput and the read latency.

Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 45729525356..3c38536bd52 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -295,6 +295,7 @@ struct cfq_data {
 	unsigned int cfq_slice_idle;
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
+	unsigned int cfq_target_latency;
 
 	/*
 	 * Fallback dummy cfqq for extreme OOM conditions
@@ -604,7 +605,7 @@ cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
 	struct cfq_rb_root *st = &cfqd->grp_service_tree;
 
-	return cfq_target_latency * cfqg->weight / st->total_weight;
+	return cfqd->cfq_target_latency * cfqg->weight / st->total_weight;
 }
 
 static inline unsigned
@@ -2271,7 +2272,8 @@ new_workload:
 		 * to have higher weight. A more accurate thing would be to
 		 * calculate system wide asnc/sync ratio.
 		 */
-		tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg);
+		tmp = cfqd->cfq_target_latency *
+			cfqg_busy_async_queues(cfqd, cfqg);
 		tmp = tmp/cfqd->busy_queues;
 		slice = min_t(unsigned, slice, tmp);
 
@@ -3737,6 +3739,7 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->cfq_back_penalty = cfq_back_penalty;
 	cfqd->cfq_slice[0] = cfq_slice_async;
 	cfqd->cfq_slice[1] = cfq_slice_sync;
+	cfqd->cfq_target_latency = cfq_target_latency;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
 	cfqd->cfq_group_idle = cfq_group_idle;
@@ -3788,6 +3791,7 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
 SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
+SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -3821,6 +3825,7 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
 		UINT_MAX, 0);
 STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
+STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -3838,6 +3843,7 @@ static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(slice_idle),
 	CFQ_ATTR(group_idle),
 	CFQ_ATTR(low_latency),
+	CFQ_ATTR(target_latency),
 	__ATTR_NULL
 };
 
-- 
cgit v1.2.3


From 407ac95e2271a310016ced97f407676e79c53c06 Mon Sep 17 00:00:00 2001
From: Tao Ma <boyu.mt@taobao.com>
Date: Sun, 1 Apr 2012 14:33:40 -0700
Subject: Documentation: Add sysfs ABI change for cfq's target latency.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/ABI/testing/sysfs-cfq-target-latency | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-cfq-target-latency

diff --git a/Documentation/ABI/testing/sysfs-cfq-target-latency b/Documentation/ABI/testing/sysfs-cfq-target-latency
new file mode 100644
index 00000000000..df0f7828c5e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-cfq-target-latency
@@ -0,0 +1,8 @@
+What:		/sys/block/<device>/iosched/target_latency
+Date:		March 2012
+contact:	Tao Ma <boyu.mt@taobao.com>
+Description:
+		The /sys/block/<device>/iosched/target_latency only exists
+		when the user sets cfq to /sys/block/<device>/scheduler.
+		It contains an estimated latency time for the cfq. cfq will
+		use it to calculate the time slice used for every task.
-- 
cgit v1.2.3


From 1b2e19f17ed327af6add02978efdf354e4f8e4df Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fusionio.com>
Date: Fri, 6 Apr 2012 11:37:47 -0600
Subject: block: make auto block plug flush threshold per-disk based

We do auto block plug flush to reduce latency, the threshold is 16
requests. This works well if the task is accessing one or two drives.
The problem is if the task is accessing a raid 0 device and the raid
disk number is big, say 8 or 16, 16/8 = 2 or 16/16=1, we will have
heavy lock contention.

This patch makes the threshold per-disk based. The latency should be
still ok accessing one or two drives. The setup with application
accessing a lot of drives in the meantime uaually is big machine,
avoiding lock contention is more important, because any contention
will actually increase latency.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 414e8224588..1f61b74867e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1277,7 +1277,8 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 	list_for_each_entry_reverse(rq, &plug->list, queuelist) {
 		int el_ret;
 
-		(*request_count)++;
+		if (rq->q == q)
+			(*request_count)++;
 
 		if (rq->q != q || !blk_rq_merge_ok(rq, bio))
 			continue;
-- 
cgit v1.2.3