From 00380a404fc4235e9b8b39598138bd3223a27b8a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 23 Mar 2012 09:58:54 +0100 Subject: block: blk_alloc_queue_node(): use caller's GFP flags instead of GFP_KERNEL We should use the GFP flags that the caller specified instead of picking our own. All the callers specify GFP_KERNEL so this doesn't make a difference to how the kernel runs, it's just a cleanup. Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 3a78b00edd7..414e8224588 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -483,7 +483,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q) return NULL; - q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); + q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); if (q->id < 0) goto fail_q; -- cgit v1.2.3 From 8bcb6c7d48eb341b1f49f814cdcbe05eb6f15680 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 30 Mar 2012 12:33:28 +0200 Subject: block: use lockdep_assert_held for queue locking Instead of an ugly open coded variant. Cc: axboe@kernel.dk Signed-off-by: Andi Kleen Signed-off-by: Jens Axboe --- block/blk-throttle.c | 2 +- include/linux/blkdev.h | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 5eed6a76721..f2ddb94626b 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1218,7 +1218,7 @@ void blk_throtl_drain(struct request_queue *q) struct bio_list bl; struct bio *bio; - WARN_ON_ONCE(!queue_is_locked(q)); + queue_lockdep_assert_held(q); bio_list_init(&bl); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 606cf339bb5..2aa24664a5b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -426,14 +426,10 @@ struct request_queue { (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_ADD_RANDOM)) -static inline int queue_is_locked(struct request_queue *q) +static inline void queue_lockdep_assert_held(struct request_queue *q) { -#ifdef CONFIG_SMP - spinlock_t *lock = q->queue_lock; - return lock && spin_is_locked(lock); -#else - return 1; -#endif + if (q->queue_lock) + lockdep_assert_held(q->queue_lock); } static inline void queue_flag_set_unlocked(unsigned int flag, @@ -445,7 +441,7 @@ static inline void queue_flag_set_unlocked(unsigned int flag, static inline int queue_flag_test_and_clear(unsigned int flag, struct request_queue *q) { - WARN_ON_ONCE(!queue_is_locked(q)); + queue_lockdep_assert_held(q); if (test_bit(flag, &q->queue_flags)) { __clear_bit(flag, &q->queue_flags); @@ -458,7 +454,7 @@ static inline int queue_flag_test_and_clear(unsigned int flag, static inline int queue_flag_test_and_set(unsigned int flag, struct request_queue *q) { - WARN_ON_ONCE(!queue_is_locked(q)); + queue_lockdep_assert_held(q); if (!test_bit(flag, &q->queue_flags)) { __set_bit(flag, &q->queue_flags); @@ -470,7 +466,7 @@ static inline int queue_flag_test_and_set(unsigned int flag, static inline void queue_flag_set(unsigned int flag, struct request_queue *q) { - WARN_ON_ONCE(!queue_is_locked(q)); + queue_lockdep_assert_held(q); __set_bit(flag, &q->queue_flags); } @@ -487,7 +483,7 @@ static inline int queue_in_flight(struct request_queue *q) static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) { - WARN_ON_ONCE(!queue_is_locked(q)); + queue_lockdep_assert_held(q); __clear_bit(flag, &q->queue_flags); } -- cgit v1.2.3 From 5bf14c0727a07ded1bd9fa6d77923d7e6dc32833 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sun, 1 Apr 2012 14:33:39 -0700 Subject: block: Make cfq_target_latency tunable through sysfs. In cfq, when we calculate a time slice for a process(or a cfqq to be precise), we have to consider the cfq_target_latency so that all the sync request have an estimated latency(300ms) and it is controlled by cfq_target_latency. But in some hadoop test, we have found that if there are many processes doing sequential read(24 for example), the throughput is bad because every process can only work for about 25ms and the cfqq is switched. That leads to a higher disk seek. We can achive the good throughput by setting low_latency=0, but then some read's latency is too much for the application. So this patch makes cfq_target_latency tunable through sysfs so that we can tune it and find some magic number which is not bad for both the throughput and the read latency. Cc: Jens Axboe Signed-off-by: Tao Ma Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 45729525356..3c38536bd52 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -295,6 +295,7 @@ struct cfq_data { unsigned int cfq_slice_idle; unsigned int cfq_group_idle; unsigned int cfq_latency; + unsigned int cfq_target_latency; /* * Fallback dummy cfqq for extreme OOM conditions @@ -604,7 +605,7 @@ cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) { struct cfq_rb_root *st = &cfqd->grp_service_tree; - return cfq_target_latency * cfqg->weight / st->total_weight; + return cfqd->cfq_target_latency * cfqg->weight / st->total_weight; } static inline unsigned @@ -2271,7 +2272,8 @@ new_workload: * to have higher weight. A more accurate thing would be to * calculate system wide asnc/sync ratio. */ - tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg); + tmp = cfqd->cfq_target_latency * + cfqg_busy_async_queues(cfqd, cfqg); tmp = tmp/cfqd->busy_queues; slice = min_t(unsigned, slice, tmp); @@ -3737,6 +3739,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->cfq_back_penalty = cfq_back_penalty; cfqd->cfq_slice[0] = cfq_slice_async; cfqd->cfq_slice[1] = cfq_slice_sync; + cfqd->cfq_target_latency = cfq_target_latency; cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; cfqd->cfq_group_idle = cfq_group_idle; @@ -3788,6 +3791,7 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); +SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ @@ -3821,6 +3825,7 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); +STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1); #undef STORE_FUNCTION #define CFQ_ATTR(name) \ @@ -3838,6 +3843,7 @@ static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(slice_idle), CFQ_ATTR(group_idle), CFQ_ATTR(low_latency), + CFQ_ATTR(target_latency), __ATTR_NULL }; -- cgit v1.2.3 From 407ac95e2271a310016ced97f407676e79c53c06 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sun, 1 Apr 2012 14:33:40 -0700 Subject: Documentation: Add sysfs ABI change for cfq's target latency. Cc: Jens Axboe Cc: Greg Kroah-Hartman Signed-off-by: Tao Ma Signed-off-by: Jens Axboe --- Documentation/ABI/testing/sysfs-cfq-target-latency | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-cfq-target-latency diff --git a/Documentation/ABI/testing/sysfs-cfq-target-latency b/Documentation/ABI/testing/sysfs-cfq-target-latency new file mode 100644 index 00000000000..df0f7828c5e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-cfq-target-latency @@ -0,0 +1,8 @@ +What: /sys/block//iosched/target_latency +Date: March 2012 +contact: Tao Ma +Description: + The /sys/block//iosched/target_latency only exists + when the user sets cfq to /sys/block//scheduler. + It contains an estimated latency time for the cfq. cfq will + use it to calculate the time slice used for every task. -- cgit v1.2.3 From 1b2e19f17ed327af6add02978efdf354e4f8e4df Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 6 Apr 2012 11:37:47 -0600 Subject: block: make auto block plug flush threshold per-disk based We do auto block plug flush to reduce latency, the threshold is 16 requests. This works well if the task is accessing one or two drives. The problem is if the task is accessing a raid 0 device and the raid disk number is big, say 8 or 16, 16/8 = 2 or 16/16=1, we will have heavy lock contention. This patch makes the threshold per-disk based. The latency should be still ok accessing one or two drives. The setup with application accessing a lot of drives in the meantime uaually is big machine, avoiding lock contention is more important, because any contention will actually increase latency. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 414e8224588..1f61b74867e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1277,7 +1277,8 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, list_for_each_entry_reverse(rq, &plug->list, queuelist) { int el_ret; - (*request_count)++; + if (rq->q == q) + (*request_count)++; if (rq->q != q || !blk_rq_merge_ok(rq, bio)) continue; -- cgit v1.2.3