From 66d7e7f09f77456fe68683247d77721032a00ee5 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 12 Sep 2011 15:26:38 +0200 Subject: Btrfs: mark delayed refs as for cow Add a for_cow parameter to add_delayed_*_ref and pass the appropriate value from every call site. The for_cow parameter will later on be used to determine if a ref will change anything with respect to qgroups. Delayed refs coming from relocation are always counted as for_cow, as they don't change subvol quota. Also pass in the fs_info for later use. btrfs_find_all_roots() will use this as an optimization, as changes that are for_cow will not change anything with respect to which root points to a certain leaf. Thus, we don't need to add the current sequence number to those delayed refs. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c5ccec23984..ea819386b86 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3139,7 +3139,7 @@ delete: ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, 0, btrfs_header_owner(leaf), - ino, extent_offset); + ino, extent_offset, 0); BUG_ON(ret); } -- cgit v1.2.3 From 6bf7e080d5bcb0d399ee38ce3dabbfad64448192 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 1 Dec 2011 14:35:19 +0100 Subject: Btrfs: make sure we're not using obsolete code in btrfs_get_extent There's code in btrfs_get_extent that should never be used. This patch turns a WARN_ON(1) into a BUG(), hoping we can remove the transaction code from btrfs_get_extent soon. Signed-off-by: Jan Schmidt --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ea819386b86..603d740f0f1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5022,7 +5022,7 @@ again: } flush_dcache_page(page); } else if (create && PageUptodate(page)) { - WARN_ON(1); + BUG(); if (!trans) { kunmap(page); free_extent_map(em); -- cgit v1.2.3 From 7ad85bb76a61801362701b77c5cee5aa09f35369 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 12 Jan 2012 19:10:12 -0500 Subject: Btrfs: do not use btrfs_end_transaction_throttle everywhere A user reported a problem where things like open with O_CREAT would take up to 30 seconds when he had nfs activity on the same mount. This is because all of our quick metadata operations, like create, symlink etc all do btrfs_end_transaction_throttle, which if the transaction is blocked will wait for the commit to complete before it returns. This adds a ridiculous amount of latency and isn't really needed. The normal btrfs_end_transaction will mark the transaction as blocked and wake the transaction kthread up if it thinks the transaction needs to end (this being in the running out of global reserve space scenario), and this is all that is really needed since we've already done everything we're going to do, we just need to return. This should help people with the latency they were seeing when using synchronous heavy workloads. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 18 +++++++++--------- fs/btrfs/xattr.c | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index acc4ff39ca4..5f8ba210c0a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2845,7 +2845,7 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans, BUG_ON(!root->fs_info->enospc_unlink); root->fs_info->enospc_unlink = 0; } - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); } static int btrfs_unlink(struct inode *dir, struct dentry *dentry) @@ -3434,7 +3434,7 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) i_size_write(inode, newsize); btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); ret = btrfs_update_inode(trans, root, inode); - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); } else { /* @@ -4655,7 +4655,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, } out_unlock: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); if (drop_inode) { inode_dec_link_count(inode); @@ -4723,7 +4723,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, } out_unlock: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4782,7 +4782,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, } nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -4848,7 +4848,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); @@ -6668,7 +6668,7 @@ end_trans: err = ret; nr = trans->blocks_used; - ret = btrfs_end_transaction_throttle(trans, root); + ret = btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); } @@ -7075,7 +7075,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, btrfs_end_log_trans(root); } out_fail: - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); out_notrans: if (old_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&root->fs_info->subvol_sem); @@ -7247,7 +7247,7 @@ out_unlock: if (!err) d_instantiate(dentry, inode); nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); if (drop_inode) { inode_dec_link_count(inode); iput(inode); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 3848b04e310..e7a5659087e 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -200,7 +200,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); out: - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); return ret; } -- cgit v1.2.3 From f70a9a6b94af86fca069a7552ab672c31b457786 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 12 Jan 2012 19:10:12 -0500 Subject: Btrfs: fix btrfsck error 400 when truncating a compressed Reproduce steps: # mkfs.btrfs /dev/sdb5 # mount /dev/sdb5 -o compress=lzo /mnt # dd if=/dev/zero of=/mnt/tmpfile bs=128K count=1 # sync # truncate -s 64K /mnt/tmpfile root 5 inode 257 errors 400 This is because of the wrong if condition, which is used to check if we should subtract the bytes of the dropped range from i_blocks/i_bytes of i-node or not. When we truncate a compressed extent, btrfs substracts the bytes of the whole extent, it's wrong. We should substract the real size that we truncate, no matter it is a compressed extent or not. Fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f8ba210c0a..946a7f1b329 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3009,7 +3009,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int pending_del_nr = 0; int pending_del_slot = 0; int extent_type = -1; - int encoding; int ret; int err = 0; u64 ino = btrfs_ino(inode); @@ -3059,7 +3058,6 @@ search_again: leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); - encoding = 0; if (found_key.objectid != ino) break; @@ -3072,10 +3070,6 @@ search_again: fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); extent_type = btrfs_file_extent_type(leaf, fi); - encoding = btrfs_file_extent_compression(leaf, fi); - encoding |= btrfs_file_extent_encryption(leaf, fi); - encoding |= btrfs_file_extent_other_encoding(leaf, fi); - if (extent_type != BTRFS_FILE_EXTENT_INLINE) { item_end += btrfs_file_extent_num_bytes(leaf, fi); @@ -3103,7 +3097,7 @@ search_again: if (extent_type != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); - if (!del_item && !encoding) { + if (!del_item) { u64 orig_num_bytes = btrfs_file_extent_num_bytes(leaf, fi); extent_num_bytes = new_size - -- cgit v1.2.3 From ec39e180fd3188c983c94603634bfcd019f42ae7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 12 Jan 2012 19:10:12 -0500 Subject: Btrfs: release space on error in page_mkwrite If updating the inode gave us an ENOSPC we were just returning in page_mkwrite, which is a problem since we make our reservation right before trying to update the inode, so fix the out label so that we actually free our reservation. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 946a7f1b329..85fd86ea983 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6488,8 +6488,8 @@ out_unlock: if (!ret) return VM_FAULT_LOCKED; unlock_page(page); - btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); out: + btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); return ret; } -- cgit v1.2.3 From 90290e19820e3323ce6b9c2888eeb68bf29c278b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 2 Dec 2011 15:44:12 -0500 Subject: Btrfs: protect orphan block rsv with spin_lock We've been seeing warnings coming out of the orphan commit stuff forever from ceph. Turns out it's because we're racing with checking if the orphan block reserve is set, because we clear it outside of the spin_lock. So leave the normal fastpath checks where they are, but take the spin_lock and _recheck_ to make sure we haven't had an orphan block rsv added in the meantime. Then clear the root's orphan block rsv and release the lock. With this patch a user said the warnings went away and they usually showed up pretty soon after he started ceph. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 85fd86ea983..619742d3716 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1951,12 +1951,28 @@ enum btrfs_orphan_cleanup_state { void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { + struct btrfs_block_rsv *block_rsv; int ret; if (!list_empty(&root->orphan_list) || root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) return; + spin_lock(&root->orphan_lock); + if (!list_empty(&root->orphan_list)) { + spin_unlock(&root->orphan_lock); + return; + } + + if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) { + spin_unlock(&root->orphan_lock); + return; + } + + block_rsv = root->orphan_block_rsv; + root->orphan_block_rsv = NULL; + spin_unlock(&root->orphan_lock); + if (root->orphan_item_inserted && btrfs_root_refs(&root->root_item) > 0) { ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, @@ -1965,10 +1981,9 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, root->orphan_item_inserted = 0; } - if (root->orphan_block_rsv) { - WARN_ON(root->orphan_block_rsv->size > 0); - btrfs_free_block_rsv(root, root->orphan_block_rsv); - root->orphan_block_rsv = NULL; + if (block_rsv) { + WARN_ON(block_rsv->size > 0); + btrfs_free_block_rsv(root, block_rsv); } } -- cgit v1.2.3 From f248679e86fead40cc78e724c7181d6bec1a2046 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Jan 2012 12:09:22 -0500 Subject: Btrfs: add a delalloc mutex to inodes for delalloc reservations I was using i_mutex for this, but we're getting bogus lockdep warnings by doing that and theres no real way to get rid of those, so just stop using i_mutex to protect delalloc metadata reservations and use a delalloc mutex instead. This shouldn't be contended often at all, only if you are writing and mmap writing to the file at the same time. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 3 +++ fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/inode.c | 11 +---------- fs/btrfs/ioctl.c | 2 -- fs/btrfs/relocation.c | 2 -- 5 files changed, 7 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 634608d2a6d..9b9b15fd520 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -51,6 +51,9 @@ struct btrfs_inode { /* held while logging the inode in tree-log.c */ struct mutex log_mutex; + /* held while doing delalloc reservations */ + struct mutex delalloc_mutex; + /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 556f9aa25bb..e0ad5f0f895 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4345,12 +4345,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) /* Need to be holding the i_mutex here if we aren't free space cache */ if (btrfs_is_free_space_inode(root, inode)) flush = 0; - else - WARN_ON(!mutex_is_locked(&inode->i_mutex)); if (flush && btrfs_transaction_in_commit(root->fs_info)) schedule_timeout(1); + mutex_lock(&BTRFS_I(inode)->delalloc_mutex); num_bytes = ALIGN(num_bytes, root->sectorsize); spin_lock(&BTRFS_I(inode)->lock); @@ -4405,6 +4404,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) btrfs_ino(inode), to_free, 0); } + mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); return ret; } @@ -4415,6 +4415,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) } BTRFS_I(inode)->reserved_extents += nr_extents; spin_unlock(&BTRFS_I(inode)->lock); + mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); if (to_reserve) trace_btrfs_space_reservation(root->fs_info,"delalloc", diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 619742d3716..5977987abdb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2239,14 +2239,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) continue; } nr_truncate++; - /* - * Need to hold the imutex for reservation purposes, not - * a huge deal here but I have a WARN_ON in - * btrfs_delalloc_reserve_space to catch offenders. - */ - mutex_lock(&inode->i_mutex); ret = btrfs_truncate(inode); - mutex_unlock(&inode->i_mutex); } else { nr_unlink++; } @@ -6411,10 +6404,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) u64 page_start; u64 page_end; - /* Need this to keep space reservations serialized */ - mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); - mutex_unlock(&inode->i_mutex); if (!ret) ret = btrfs_update_time(vma->vm_file); if (ret) { @@ -6758,6 +6748,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) extent_io_tree_init(&ei->io_tree, &inode->i_data); extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); mutex_init(&ei->log_mutex); + mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->delalloc_inodes); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7fdf22c2dc0..6834be4c870 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -868,10 +868,8 @@ static int cluster_pages_for_defrag(struct inode *inode, return 0; file_end = (isize - 1) >> PAGE_CACHE_SHIFT; - mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_space(inode, num_pages << PAGE_CACHE_SHIFT); - mutex_unlock(&inode->i_mutex); if (ret) return ret; again: diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index efe9f792544..8c1aae2c845 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2949,9 +2949,7 @@ static int relocate_file_extent_cluster(struct inode *inode, index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; while (index <= last_index) { - mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); - mutex_unlock(&inode->i_mutex); if (ret) goto out; -- cgit v1.2.3