From 2fe17c1075836b66678ed2a305fd09b6773883aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Jan 2011 13:07:43 +0100 Subject: fallocate should be a file operation Currently all filesystems except XFS implement fallocate asynchronously, while XFS forced a commit. Both of these are suboptimal - in case of O_SYNC I/O we really want our allocation on disk, especially for the !KEEP_SIZE case where we actually grow the file with user-visible zeroes. On the other hand always commiting the transaction is a bad idea for fast-path uses of fallocate like for example in recent Samba versions. Given that block allocation is a data plane operation anyway change it from an inode operation to a file operation so that we have the file structure available that lets us check for O_SYNC. This also includes moving the code around for a few of the filesystems, and remove the already unnedded S_ISDIR checks given that we only wire up fallocate for regular files. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/gfs2/file.c | 258 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/ops_inode.c | 258 ---------------------------------------------------- 2 files changed, 258 insertions(+), 258 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index fca6689e12e..7cfdcb91336 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -610,6 +612,260 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return generic_file_aio_write(iocb, iov, nr_segs, pos); } +static void empty_write_end(struct page *page, unsigned from, + unsigned to) +{ + struct gfs2_inode *ip = GFS2_I(page->mapping->host); + + page_zero_new_buffers(page, from, to); + flush_dcache_page(page); + mark_page_accessed(page); + + if (!gfs2_is_writeback(ip)) + gfs2_page_add_databufs(ip, page, from, to); + + block_commit_write(page, from, to); +} + +static int write_empty_blocks(struct page *page, unsigned from, unsigned to) +{ + unsigned start, end, next; + struct buffer_head *bh, *head; + int error; + + if (!page_has_buffers(page)) { + error = __block_write_begin(page, from, to - from, gfs2_block_map); + if (unlikely(error)) + return error; + + empty_write_end(page, from, to); + return 0; + } + + bh = head = page_buffers(page); + next = end = 0; + while (next < from) { + next += bh->b_size; + bh = bh->b_this_page; + } + start = next; + do { + next += bh->b_size; + if (buffer_mapped(bh)) { + if (end) { + error = __block_write_begin(page, start, end - start, + gfs2_block_map); + if (unlikely(error)) + return error; + empty_write_end(page, start, end); + end = 0; + } + start = next; + } + else + end = next; + bh = bh->b_this_page; + } while (next < to); + + if (end) { + error = __block_write_begin(page, start, end - start, gfs2_block_map); + if (unlikely(error)) + return error; + empty_write_end(page, start, end); + } + + return 0; +} + +static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, + int mode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct buffer_head *dibh; + int error; + u64 start = offset >> PAGE_CACHE_SHIFT; + unsigned int start_offset = offset & ~PAGE_CACHE_MASK; + u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; + pgoff_t curr; + struct page *page; + unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; + unsigned int from, to; + + if (!end_offset) + end_offset = PAGE_CACHE_SIZE; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (unlikely(error)) + goto out; + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, NULL); + if (unlikely(error)) + goto out; + } + + curr = start; + offset = start << PAGE_CACHE_SHIFT; + from = start_offset; + to = PAGE_CACHE_SIZE; + while (curr <= end) { + page = grab_cache_page_write_begin(inode->i_mapping, curr, + AOP_FLAG_NOFS); + if (unlikely(!page)) { + error = -ENOMEM; + goto out; + } + + if (curr == end) + to = end_offset; + error = write_empty_blocks(page, from, to); + if (!error && offset + to > inode->i_size && + !(mode & FALLOC_FL_KEEP_SIZE)) { + i_size_write(inode, offset + to); + } + unlock_page(page); + page_cache_release(page); + if (error) + goto out; + curr++; + offset += PAGE_CACHE_SIZE; + from = 0; + } + + gfs2_dinode_out(ip, dibh->b_data); + mark_inode_dirty(inode); + + brelse(dibh); + +out: + return error; +} + +static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, + unsigned int *data_blocks, unsigned int *ind_blocks) +{ + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; + unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); + + for (tmp = max_data; tmp > sdp->sd_diptrs;) { + tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); + max_data -= tmp; + } + /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, + so it might end up with fewer data blocks */ + if (max_data <= *data_blocks) + return; + *data_blocks = max_data; + *ind_blocks = max_blocks - max_data; + *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; + if (*len > max) { + *len = max; + gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); + } +} + +static long gfs2_fallocate(struct file *file, int mode, loff_t offset, + loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_inode *ip = GFS2_I(inode); + unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + loff_t bytes, max_bytes; + struct gfs2_alloc *al; + int error; + loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; + next = (next + 1) << sdp->sd_sb.sb_bsize_shift; + + /* We only support the FALLOC_FL_KEEP_SIZE mode */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + offset = (offset >> sdp->sd_sb.sb_bsize_shift) << + sdp->sd_sb.sb_bsize_shift; + + len = next - offset; + bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; + if (!bytes) + bytes = UINT_MAX; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); + error = gfs2_glock_nq(&ip->i_gh); + if (unlikely(error)) + goto out_uninit; + + if (!gfs2_write_alloc_required(ip, offset, len)) + goto out_unlock; + + while (len > 0) { + if (len < bytes) + bytes = len; + al = gfs2_alloc_get(ip); + if (!al) { + error = -ENOMEM; + goto out_unlock; + } + + error = gfs2_quota_lock_check(ip); + if (error) + goto out_alloc_put; + +retry: + gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); + + al->al_requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip); + if (error) { + if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { + bytes >>= 1; + goto retry; + } + goto out_qunlock; + } + max_bytes = bytes; + calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); + al->al_requested = data_blocks + ind_blocks; + + rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + + RES_RG_HDR + gfs2_rg_blocks(al); + if (gfs2_is_jdata(ip)) + rblocks += data_blocks ? data_blocks : 1; + + error = gfs2_trans_begin(sdp, rblocks, + PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); + if (error) + goto out_trans_fail; + + error = fallocate_chunk(inode, offset, max_bytes, mode); + gfs2_trans_end(sdp); + + if (error) + goto out_trans_fail; + + len -= max_bytes; + offset += max_bytes; + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + } + goto out_unlock; + +out_trans_fail: + gfs2_inplace_release(ip); +out_qunlock: + gfs2_quota_unlock(ip); +out_alloc_put: + gfs2_alloc_put(ip); +out_unlock: + gfs2_glock_dq(&ip->i_gh); +out_uninit: + gfs2_holder_uninit(&ip->i_gh); + return error; +} + #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** @@ -765,6 +1021,7 @@ const struct file_operations gfs2_file_fops = { .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .setlease = gfs2_setlease, + .fallocate = gfs2_fallocate, }; const struct file_operations gfs2_dir_fops = { @@ -794,6 +1051,7 @@ const struct file_operations gfs2_file_fops_nolock = { .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .setlease = generic_setlease, + .fallocate = gfs2_fallocate, }; const struct file_operations gfs2_dir_fops_nolock = { diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index c09528c07f3..d8b26ac2e20 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -18,8 +18,6 @@ #include #include #include -#include -#include #include #include "gfs2.h" @@ -1257,261 +1255,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) return ret; } -static void empty_write_end(struct page *page, unsigned from, - unsigned to) -{ - struct gfs2_inode *ip = GFS2_I(page->mapping->host); - - page_zero_new_buffers(page, from, to); - flush_dcache_page(page); - mark_page_accessed(page); - - if (!gfs2_is_writeback(ip)) - gfs2_page_add_databufs(ip, page, from, to); - - block_commit_write(page, from, to); -} - - -static int write_empty_blocks(struct page *page, unsigned from, unsigned to) -{ - unsigned start, end, next; - struct buffer_head *bh, *head; - int error; - - if (!page_has_buffers(page)) { - error = __block_write_begin(page, from, to - from, gfs2_block_map); - if (unlikely(error)) - return error; - - empty_write_end(page, from, to); - return 0; - } - - bh = head = page_buffers(page); - next = end = 0; - while (next < from) { - next += bh->b_size; - bh = bh->b_this_page; - } - start = next; - do { - next += bh->b_size; - if (buffer_mapped(bh)) { - if (end) { - error = __block_write_begin(page, start, end - start, - gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); - end = 0; - } - start = next; - } - else - end = next; - bh = bh->b_this_page; - } while (next < to); - - if (end) { - error = __block_write_begin(page, start, end - start, gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); - } - - return 0; -} - -static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, - int mode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct buffer_head *dibh; - int error; - u64 start = offset >> PAGE_CACHE_SHIFT; - unsigned int start_offset = offset & ~PAGE_CACHE_MASK; - u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; - pgoff_t curr; - struct page *page; - unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; - unsigned int from, to; - - if (!end_offset) - end_offset = PAGE_CACHE_SIZE; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (unlikely(error)) - goto out; - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, NULL); - if (unlikely(error)) - goto out; - } - - curr = start; - offset = start << PAGE_CACHE_SHIFT; - from = start_offset; - to = PAGE_CACHE_SIZE; - while (curr <= end) { - page = grab_cache_page_write_begin(inode->i_mapping, curr, - AOP_FLAG_NOFS); - if (unlikely(!page)) { - error = -ENOMEM; - goto out; - } - - if (curr == end) - to = end_offset; - error = write_empty_blocks(page, from, to); - if (!error && offset + to > inode->i_size && - !(mode & FALLOC_FL_KEEP_SIZE)) { - i_size_write(inode, offset + to); - } - unlock_page(page); - page_cache_release(page); - if (error) - goto out; - curr++; - offset += PAGE_CACHE_SIZE; - from = 0; - } - - gfs2_dinode_out(ip, dibh->b_data); - mark_inode_dirty(inode); - - brelse(dibh); - -out: - return error; -} - -static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, - unsigned int *data_blocks, unsigned int *ind_blocks) -{ - const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; - unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); - - for (tmp = max_data; tmp > sdp->sd_diptrs;) { - tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); - max_data -= tmp; - } - /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, - so it might end up with fewer data blocks */ - if (max_data <= *data_blocks) - return; - *data_blocks = max_data; - *ind_blocks = max_blocks - max_data; - *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; - if (*len > max) { - *len = max; - gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); - } -} - -static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, - loff_t len) -{ - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_inode *ip = GFS2_I(inode); - unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - loff_t bytes, max_bytes; - struct gfs2_alloc *al; - int error; - loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; - next = (next + 1) << sdp->sd_sb.sb_bsize_shift; - - /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode & ~FALLOC_FL_KEEP_SIZE) - return -EOPNOTSUPP; - - offset = (offset >> sdp->sd_sb.sb_bsize_shift) << - sdp->sd_sb.sb_bsize_shift; - - len = next - offset; - bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; - if (!bytes) - bytes = UINT_MAX; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); - if (unlikely(error)) - goto out_uninit; - - if (!gfs2_write_alloc_required(ip, offset, len)) - goto out_unlock; - - while (len > 0) { - if (len < bytes) - bytes = len; - al = gfs2_alloc_get(ip); - if (!al) { - error = -ENOMEM; - goto out_unlock; - } - - error = gfs2_quota_lock_check(ip); - if (error) - goto out_alloc_put; - -retry: - gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - - al->al_requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip); - if (error) { - if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { - bytes >>= 1; - goto retry; - } - goto out_qunlock; - } - max_bytes = bytes; - calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); - al->al_requested = data_blocks + ind_blocks; - - rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(al); - if (gfs2_is_jdata(ip)) - rblocks += data_blocks ? data_blocks : 1; - - error = gfs2_trans_begin(sdp, rblocks, - PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); - if (error) - goto out_trans_fail; - - error = fallocate_chunk(inode, offset, max_bytes, mode); - gfs2_trans_end(sdp); - - if (error) - goto out_trans_fail; - - len -= max_bytes; - offset += max_bytes; - gfs2_inplace_release(ip); - gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); - } - goto out_unlock; - -out_trans_fail: - gfs2_inplace_release(ip); -out_qunlock: - gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_alloc_put(ip); -out_unlock: - gfs2_glock_dq(&ip->i_gh); -out_uninit: - gfs2_holder_uninit(&ip->i_gh); - return error; -} - - static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -1562,7 +1305,6 @@ const struct inode_operations gfs2_file_iops = { .getxattr = gfs2_getxattr, .listxattr = gfs2_listxattr, .removexattr = gfs2_removexattr, - .fallocate = gfs2_fallocate, .fiemap = gfs2_fiemap, }; -- cgit v1.2.3