From 1faa16d22877f4839bd433547d770c676d1d964c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 6 Apr 2009 14:48:01 +0200 Subject: block: change the request allocation/congestion logic to be sync/async based This makes sure that we never wait on async IO for sync requests, instead of doing the split on writes vs reads. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 52 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 465d6babc84..67dae3bd881 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -38,6 +38,10 @@ struct request; typedef void (rq_end_io_fn)(struct request *, int); struct request_list { + /* + * count[], starved[], and wait[] are indexed by + * BLK_RW_SYNC/BLK_RW_ASYNC + */ int count[2]; int starved[2]; int elvpriv; @@ -66,6 +70,11 @@ enum rq_cmd_type_bits { REQ_TYPE_ATA_PC, }; +enum { + BLK_RW_ASYNC = 0, + BLK_RW_SYNC = 1, +}; + /* * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a @@ -103,7 +112,7 @@ enum rq_flag_bits { __REQ_QUIET, /* don't worry about errors */ __REQ_PREEMPT, /* set for "ide_preempt" requests */ __REQ_ORDERED_COLOR, /* is before or after barrier */ - __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ + __REQ_RW_SYNC, /* request is sync (sync write or read) */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_RW_META, /* metadata io request */ __REQ_COPY_USER, /* contains copies of user pages */ @@ -438,8 +447,8 @@ struct request_queue #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ -#define QUEUE_FLAG_READFULL 3 /* read queue has been filled */ -#define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */ +#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ +#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ @@ -611,32 +620,41 @@ enum { #define rq_data_dir(rq) ((rq)->cmd_flags & 1) /* - * We regard a request as sync, if it's a READ or a SYNC write. + * We regard a request as sync, if either a read or a sync write */ -#define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) +static inline bool rw_is_sync(unsigned int rw_flags) +{ + return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC); +} + +static inline bool rq_is_sync(struct request *rq) +{ + return rw_is_sync(rq->cmd_flags); +} + #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) -static inline int blk_queue_full(struct request_queue *q, int rw) +static inline int blk_queue_full(struct request_queue *q, int sync) { - if (rw == READ) - return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); - return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); + if (sync) + return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags); + return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags); } -static inline void blk_set_queue_full(struct request_queue *q, int rw) +static inline void blk_set_queue_full(struct request_queue *q, int sync) { - if (rw == READ) - queue_flag_set(QUEUE_FLAG_READFULL, q); + if (sync) + queue_flag_set(QUEUE_FLAG_SYNCFULL, q); else - queue_flag_set(QUEUE_FLAG_WRITEFULL, q); + queue_flag_set(QUEUE_FLAG_ASYNCFULL, q); } -static inline void blk_clear_queue_full(struct request_queue *q, int rw) +static inline void blk_clear_queue_full(struct request_queue *q, int sync) { - if (rw == READ) - queue_flag_clear(QUEUE_FLAG_READFULL, q); + if (sync) + queue_flag_clear(QUEUE_FLAG_SYNCFULL, q); else - queue_flag_clear(QUEUE_FLAG_WRITEFULL, q); + queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q); } -- cgit v1.2.3 From aeb6fafb8fa53266d70ca7474fcda2bdaf96524a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 6 Apr 2009 14:48:07 +0200 Subject: block: Add flag for telling the IO schedulers NOT to anticipate more IO By default, CFQ will anticipate more IO from a given io context if the previously completed IO was sync. This used to be fine, since the only sync IO was reads and O_DIRECT writes. But with more "normal" sync writes being used now, we don't want to anticipate for those. Add a bio/request flag that informs the IO scheduler that this is a sync request that we should not idle for. Introduce WRITE_ODIRECT specifically for O_DIRECT writes, and make sure that the other sync writes set this flag. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/blk-core.c | 2 ++ block/cfq-iosched.c | 4 +++- fs/direct-io.c | 2 +- include/linux/bio.h | 19 +++++++++++-------- include/linux/blkdev.h | 3 +++ include/linux/fs.h | 9 +++++---- 6 files changed, 25 insertions(+), 14 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-core.c b/block/blk-core.c index c4198f083e5..25572802dac 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1128,6 +1128,8 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_UNPLUG; if (bio_rw_meta(bio)) req->cmd_flags |= REQ_RW_META; + if (bio_noidle(bio)) + req->cmd_flags |= REQ_NOIDLE; req->errors = 0; req->hard_sector = req->sector = bio->bi_sector; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 664ebfd092e..9e809345f71 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1992,8 +1992,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) } if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) cfq_slice_expired(cfqd, 1); - else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) + else if (sync && !rq_noidle(rq) && + RB_EMPTY_ROOT(&cfqq->sort_list)) { cfq_arm_slice_timer(cfqd); + } } if (!cfqd->rq_in_driver) diff --git a/fs/direct-io.c b/fs/direct-io.c index b6d43908ff7..da258e7249c 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1126,7 +1126,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, int acquire_i_mutex = 0; if (rw & WRITE) - rw = WRITE_SYNC; + rw = WRITE_ODIRECT; if (bdev) bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); diff --git a/include/linux/bio.h b/include/linux/bio.h index b05b1d4d17d..b900d2c67d2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -145,20 +145,21 @@ struct bio { * bit 2 -- barrier * Insert a serialization point in the IO queue, forcing previously * submitted IO to be completed before this one is issued. - * bit 3 -- synchronous I/O hint: the block layer will unplug immediately - * Note that this does NOT indicate that the IO itself is sync, just - * that the block layer will not postpone issue of this IO by plugging. - * bit 4 -- metadata request + * bit 3 -- synchronous I/O hint. + * bit 4 -- Unplug the device immediately after submitting this bio. + * bit 5 -- metadata request * Used for tracing to differentiate metadata and data IO. May also * get some preferential treatment in the IO scheduler - * bit 5 -- discard sectors + * bit 6 -- discard sectors * Informs the lower level device that this range of sectors is no longer * used by the file system and may thus be freed by the device. Used * for flash based storage. - * bit 6 -- fail fast device errors - * bit 7 -- fail fast transport errors - * bit 8 -- fail fast driver errors + * bit 7 -- fail fast device errors + * bit 8 -- fail fast transport errors + * bit 9 -- fail fast driver errors * Don't want driver retries for any fast fail whatever the reason. + * bit 10 -- Tell the IO scheduler not to wait for more requests after this + one has been submitted, even if it is a SYNC request. */ #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ @@ -170,6 +171,7 @@ struct bio { #define BIO_RW_FAILFAST_DEV 7 #define BIO_RW_FAILFAST_TRANSPORT 8 #define BIO_RW_FAILFAST_DRIVER 9 +#define BIO_RW_NOIDLE 10 #define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) @@ -188,6 +190,7 @@ struct bio { #define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD) #define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META) #define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD) +#define bio_noidle(bio) bio_rw_flagged(bio, BIO_RW_NOIDLE) /* * upper 16 bits of bi_rw define the io priority of this bio diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 67dae3bd881..e03660964e0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,6 +118,7 @@ enum rq_flag_bits { __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_UNPLUG, /* unplug queue on submission */ + __REQ_NOIDLE, /* Don't anticipate more IO after this one */ __REQ_NR_BITS, /* stops here */ }; @@ -145,6 +146,7 @@ enum rq_flag_bits { #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_UNPLUG (1 << __REQ_UNPLUG) +#define REQ_NOIDLE (1 << __REQ_NOIDLE) #define BLK_MAX_CDB 16 @@ -633,6 +635,7 @@ static inline bool rq_is_sync(struct request *rq) } #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) +#define rq_noidle(rq) ((rq)->cmd_flags & REQ_NOIDLE) static inline int blk_queue_full(struct request_queue *q, int sync) { diff --git a/include/linux/fs.h b/include/linux/fs.h index ea0510978f7..cae5720f431 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -95,11 +95,12 @@ struct inodes_stat_t { #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define READ_META (READ | (1 << BIO_RW_META)) -#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) -#define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO)) -#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) +#define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) +#define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) +#define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define SWRITE_SYNC_PLUG \ - (SWRITE | (1 << BIO_RW_SYNCIO)) + (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) +#define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) -- cgit v1.2.3