diff options
Diffstat (limited to 'fs/direct-io.c')
| -rw-r--r-- | fs/direct-io.c | 103 |
1 files changed, 69 insertions, 34 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index e82adc2debb..51f270b479b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -82,6 +82,8 @@ struct dio { int reap_counter; /* rate limit reaping */ get_block_t *get_block; /* block mapping function */ dio_iodone_t *end_io; /* IO completion function */ + dio_submit_t *submit_io; /* IO submition function */ + loff_t logical_offset_in_bio; /* current first logical block in bio */ sector_t final_block_in_bio; /* current final block in bio + 1 */ sector_t next_block_for_io; /* next block to be put under IO, in dio_blocks units */ @@ -96,6 +98,7 @@ struct dio { unsigned cur_page_offset; /* Offset into it, in bytes */ unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ sector_t cur_page_block; /* Where it starts */ + loff_t cur_page_fs_offset; /* Offset in file */ /* BIO completion state */ spinlock_t bio_lock; /* protects BIO fields below */ @@ -215,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio) * filesystems can use it to hold additional state between get_block calls and * dio_complete. */ -static int dio_complete(struct dio *dio, loff_t offset, int ret) +static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async) { ssize_t transferred = 0; @@ -236,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) transferred = dio->i_size - offset; } - if (dio->end_io && dio->result) - dio->end_io(dio->iocb, offset, transferred, - dio->map_bh.b_private); - - if (dio->flags & DIO_LOCKING) - /* lockdep: non-owner release */ - up_read_non_owner(&dio->inode->i_alloc_sem); - if (ret == 0) ret = dio->page_errors; if (ret == 0) @@ -251,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) if (ret == 0) ret = transferred; + if (dio->end_io && dio->result) { + dio->end_io(dio->iocb, offset, transferred, + dio->map_bh.b_private, ret, is_async); + } else if (is_async) { + aio_complete(dio->iocb, ret, 0); + } + + if (dio->flags & DIO_LOCKING) + /* lockdep: non-owner release */ + up_read_non_owner(&dio->inode->i_alloc_sem); + return ret; } @@ -274,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error) spin_unlock_irqrestore(&dio->bio_lock, flags); if (remaining == 0) { - int ret = dio_complete(dio, dio->iocb->ki_pos, 0); - aio_complete(dio->iocb, ret, 0); + dio_complete(dio, dio->iocb->ki_pos, 0, true); kfree(dio); } } @@ -300,6 +305,26 @@ static void dio_bio_end_io(struct bio *bio, int error) spin_unlock_irqrestore(&dio->bio_lock, flags); } +/** + * dio_end_io - handle the end io action for the given bio + * @bio: The direct io bio thats being completed + * @error: Error if there was one + * + * This is meant to be called by any filesystem that uses their own dio_submit_t + * so that the DIO specific endio actions are dealt with after the filesystem + * has done it's completion work. + */ +void dio_end_io(struct bio *bio, int error) +{ + struct dio *dio = bio->bi_private; + + if (dio->is_async) + dio_bio_end_aio(bio, error); + else + dio_bio_end_io(bio, error); +} +EXPORT_SYMBOL_GPL(dio_end_io); + static int dio_bio_alloc(struct dio *dio, struct block_device *bdev, sector_t first_sector, int nr_vecs) @@ -316,6 +341,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, bio->bi_end_io = dio_bio_end_io; dio->bio = bio; + dio->logical_offset_in_bio = dio->cur_page_fs_offset; return 0; } @@ -340,10 +366,15 @@ static void dio_bio_submit(struct dio *dio) if (dio->is_async && dio->rw == READ) bio_set_pages_dirty(bio); - submit_bio(dio->rw, bio); + if (dio->submit_io) + dio->submit_io(dio->rw, bio, dio->inode, + dio->logical_offset_in_bio); + else + submit_bio(dio->rw, bio); dio->bio = NULL; dio->boundary = 0; + dio->logical_offset_in_bio = 0; } /* @@ -603,10 +634,26 @@ static int dio_send_cur_page(struct dio *dio) int ret = 0; if (dio->bio) { + loff_t cur_offset = dio->block_in_file << dio->blkbits; + loff_t bio_next_offset = dio->logical_offset_in_bio + + dio->bio->bi_size; + /* - * See whether this new request is contiguous with the old + * See whether this new request is contiguous with the old. + * + * Btrfs cannot handl having logically non-contiguous requests + * submitted. For exmple if you have + * + * Logical: [0-4095][HOLE][8192-12287] + * Phyiscal: [0-4095] [4096-8181] + * + * We cannot submit those pages together as one BIO. So if our + * current logical offset in the file does not equal what would + * be the next logical offset in the bio, submit the bio we + * have. */ - if (dio->final_block_in_bio != dio->cur_page_block) + if (dio->final_block_in_bio != dio->cur_page_block || + cur_offset != bio_next_offset) dio_bio_submit(dio); /* * Submit now if the underlying fs is about to perform a @@ -701,6 +748,7 @@ submit_page_section(struct dio *dio, struct page *page, dio->cur_page_offset = offset; dio->cur_page_len = len; dio->cur_page_block = blocknr; + dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; out: return ret; } @@ -935,7 +983,7 @@ static ssize_t direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs, unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, - struct dio *dio) + dio_submit_t submit_io, struct dio *dio) { unsigned long user_addr; unsigned long flags; @@ -952,6 +1000,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, dio->get_block = get_block; dio->end_io = end_io; + dio->submit_io = submit_io; dio->final_block_in_bio = -1; dio->next_block_for_io = -1; @@ -1008,7 +1057,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, } } /* end iovec loop */ - if (ret == -ENOTBLK && (rw & WRITE)) { + if (ret == -ENOTBLK) { /* * The remaining part of the request will be * be handled by buffered I/O when we return @@ -1079,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, spin_unlock_irqrestore(&dio->bio_lock, flags); if (ret2 == 0) { - ret = dio_complete(dio, offset, ret); + ret = dio_complete(dio, offset, ret, false); kfree(dio); } else BUG_ON(ret != -EIOCBQUEUED); @@ -1110,7 +1159,7 @@ ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - int flags) + dio_submit_t submit_io, int flags) { int seg; size_t size; @@ -1197,22 +1246,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, (end > i_size_read(inode))); retval = direct_io_worker(rw, iocb, inode, iov, offset, - nr_segs, blkbits, get_block, end_io, dio); - - /* - * In case of error extending write may have instantiated a few - * blocks outside i_size. Trim these off again for DIO_LOCKING. - * - * NOTE: filesystems with their own locking have to handle this - * on their own. - */ - if (flags & DIO_LOCKING) { - if (unlikely((rw & WRITE) && retval < 0)) { - loff_t isize = i_size_read(inode); - if (end > isize) - vmtruncate(inode, isize); - } - } + nr_segs, blkbits, get_block, end_io, + submit_io, dio); out: return retval; |
