diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 359 |
1 files changed, 349 insertions, 10 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 33785333f29..372fd224a11 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache) return NULL; } + /* We're loading it the fast way, so we don't have a caching_ctl. */ + if (!cache->caching_ctl) { + spin_unlock(&cache->lock); + return NULL; + } + ctl = cache->caching_ctl; atomic_inc(&ctl->count); spin_unlock(&cache->lock); @@ -421,7 +427,9 @@ err: return 0; } -static int cache_block_group(struct btrfs_block_group_cache *cache) +static int cache_block_group(struct btrfs_block_group_cache *cache, + struct btrfs_trans_handle *trans, + int load_cache_only) { struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_caching_control *caching_ctl; @@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache) if (cache->cached != BTRFS_CACHE_NO) return 0; + /* + * We can't do the read from on-disk cache during a commit since we need + * to have the normal tree locking. + */ + if (!trans->transaction->in_commit) { + spin_lock(&cache->lock); + if (cache->cached != BTRFS_CACHE_NO) { + spin_unlock(&cache->lock); + return 0; + } + cache->cached = BTRFS_CACHE_STARTED; + spin_unlock(&cache->lock); + + ret = load_free_space_cache(fs_info, cache); + + spin_lock(&cache->lock); + if (ret == 1) { + cache->cached = BTRFS_CACHE_FINISHED; + cache->last_byte_to_unpin = (u64)-1; + } else { + cache->cached = BTRFS_CACHE_NO; + } + spin_unlock(&cache->lock); + if (ret == 1) + return 0; + } + + if (load_cache_only) + return 0; + caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); BUG_ON(!caching_ctl); @@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { - if (found->flags == flags) { + if (found->flags & flags) { rcu_read_unlock(); return found; } @@ -2688,6 +2726,109 @@ next_block_group(struct btrfs_root *root, return cache; } +static int cache_save_setup(struct btrfs_block_group_cache *block_group, + struct btrfs_trans_handle *trans, + struct btrfs_path *path) +{ + struct btrfs_root *root = block_group->fs_info->tree_root; + struct inode *inode = NULL; + u64 alloc_hint = 0; + int num_pages = 0; + int retries = 0; + int ret = 0; + + /* + * If this block group is smaller than 100 megs don't bother caching the + * block group. + */ + if (block_group->key.offset < (100 * 1024 * 1024)) { + spin_lock(&block_group->lock); + block_group->disk_cache_state = BTRFS_DC_WRITTEN; + spin_unlock(&block_group->lock); + return 0; + } + +again: + inode = lookup_free_space_inode(root, block_group, path); + if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { + ret = PTR_ERR(inode); + btrfs_release_path(root, path); + goto out; + } + + if (IS_ERR(inode)) { + BUG_ON(retries); + retries++; + + if (block_group->ro) + goto out_free; + + ret = create_free_space_inode(root, trans, block_group, path); + if (ret) + goto out_free; + goto again; + } + + /* + * We want to set the generation to 0, that way if anything goes wrong + * from here on out we know not to trust this cache when we load up next + * time. + */ + BTRFS_I(inode)->generation = 0; + ret = btrfs_update_inode(trans, root, inode); + WARN_ON(ret); + + if (i_size_read(inode) > 0) { + ret = btrfs_truncate_free_space_cache(root, trans, path, + inode); + if (ret) + goto out_put; + } + + spin_lock(&block_group->lock); + if (block_group->cached != BTRFS_CACHE_FINISHED) { + spin_unlock(&block_group->lock); + goto out_put; + } + spin_unlock(&block_group->lock); + + num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); + if (!num_pages) + num_pages = 1; + + /* + * Just to make absolutely sure we have enough space, we're going to + * preallocate 12 pages worth of space for each block group. In + * practice we ought to use at most 8, but we need extra space so we can + * add our header and have a terminator between the extents and the + * bitmaps. + */ + num_pages *= 16; + num_pages *= PAGE_CACHE_SIZE; + + ret = btrfs_check_data_free_space(inode, num_pages); + if (ret) + goto out_put; + + ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, + num_pages, num_pages, + &alloc_hint); + btrfs_free_reserved_data_space(inode, num_pages); +out_put: + iput(inode); +out_free: + btrfs_release_path(root, path); +out: + spin_lock(&block_group->lock); + if (ret) + block_group->disk_cache_state = BTRFS_DC_ERROR; + else + block_group->disk_cache_state = BTRFS_DC_SETUP; + spin_unlock(&block_group->lock); + + return ret; +} + int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -2700,6 +2841,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; +again: + while (1) { + cache = btrfs_lookup_first_block_group(root->fs_info, last); + while (cache) { + if (cache->disk_cache_state == BTRFS_DC_CLEAR) + break; + cache = next_block_group(root, cache); + } + if (!cache) { + if (last == 0) + break; + last = 0; + continue; + } + err = cache_save_setup(cache, trans, path); + last = cache->key.objectid + cache->key.offset; + btrfs_put_block_group(cache); + } + while (1) { if (last == 0) { err = btrfs_run_delayed_refs(trans, root, @@ -2709,6 +2869,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, cache = btrfs_lookup_first_block_group(root->fs_info, last); while (cache) { + if (cache->disk_cache_state == BTRFS_DC_CLEAR) { + btrfs_put_block_group(cache); + goto again; + } + if (cache->dirty) break; cache = next_block_group(root, cache); @@ -2720,6 +2885,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, continue; } + if (cache->disk_cache_state == BTRFS_DC_SETUP) + cache->disk_cache_state = BTRFS_DC_NEED_WRITE; cache->dirty = 0; last = cache->key.objectid + cache->key.offset; @@ -2728,6 +2895,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, btrfs_put_block_group(cache); } + while (1) { + /* + * I don't think this is needed since we're just marking our + * preallocated extent as written, but just in case it can't + * hurt. + */ + if (last == 0) { + err = btrfs_run_delayed_refs(trans, root, + (unsigned long)-1); + BUG_ON(err); + } + + cache = btrfs_lookup_first_block_group(root->fs_info, last); + while (cache) { + /* + * Really this shouldn't happen, but it could if we + * couldn't write the entire preallocated extent and + * splitting the extent resulted in a new block. + */ + if (cache->dirty) { + btrfs_put_block_group(cache); + goto again; + } + if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) + break; + cache = next_block_group(root, cache); + } + if (!cache) { + if (last == 0) + break; + last = 0; + continue; + } + + btrfs_write_out_cache(root, trans, cache, path); + + /* + * If we didn't have an error then the cache state is still + * NEED_WRITE, so we can set it to WRITTEN. + */ + if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) + cache->disk_cache_state = BTRFS_DC_WRITTEN; + last = cache->key.objectid + cache->key.offset; + btrfs_put_block_group(cache); + } + btrfs_free_path(path); return 0; } @@ -2885,11 +3098,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) struct btrfs_space_info *data_sinfo; struct btrfs_root *root = BTRFS_I(inode)->root; u64 used; - int ret = 0, committed = 0; + int ret = 0, committed = 0, alloc_chunk = 1; /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + if (root == root->fs_info->tree_root) { + alloc_chunk = 0; + committed = 1; + } + data_sinfo = BTRFS_I(inode)->space_info; if (!data_sinfo) goto alloc; @@ -2908,7 +3126,7 @@ again: * if we don't have enough free bytes in this space then we need * to alloc a new chunk. */ - if (!data_sinfo->full) { + if (!data_sinfo->full && alloc_chunk) { u64 alloc_target; data_sinfo->force_alloc = 1; @@ -3054,6 +3272,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, spin_unlock(&space_info->lock); /* + * If we have mixed data/metadata chunks we want to make sure we keep + * allocating mixed chunks instead of individual chunks. + */ + if (btrfs_mixed_space_info(space_info)) + flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); + + /* * if we're doing a data chunk, go ahead and make sure that * we keep a reasonable number of metadata chunks allocated in the * FS as well. @@ -3815,12 +4040,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc) { - struct btrfs_block_group_cache *cache; + struct btrfs_block_group_cache *cache = NULL; struct btrfs_fs_info *info = root->fs_info; - int factor; u64 total = num_bytes; u64 old_val; u64 byte_in_group; + int factor; /* block accounting for super block */ spin_lock(&info->delalloc_lock); @@ -3842,11 +4067,25 @@ static int update_block_group(struct btrfs_trans_handle *trans, factor = 2; else factor = 1; + /* + * If this block group has free space cache written out, we + * need to make sure to load it if we are removing space. This + * is because we need the unpinning stage to actually add the + * space back to the block group, otherwise we will leak space. + */ + if (!alloc && cache->cached == BTRFS_CACHE_NO) + cache_block_group(cache, trans, 1); + byte_in_group = bytenr - cache->key.objectid; WARN_ON(byte_in_group > cache->key.offset); spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); + + if (btrfs_super_cache_generation(&info->super_copy) != 0 && + cache->disk_cache_state < BTRFS_DC_CLEAR) + cache->disk_cache_state = BTRFS_DC_CLEAR; + cache->dirty = 1; old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); @@ -4593,6 +4832,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, bool found_uncached_bg = false; bool failed_cluster_refill = false; bool failed_alloc = false; + bool use_cluster = true; u64 ideal_cache_percent = 0; u64 ideal_cache_offset = 0; @@ -4607,16 +4847,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, return -ENOSPC; } + /* + * If the space info is for both data and metadata it means we have a + * small filesystem and we can't use the clustering stuff. + */ + if (btrfs_mixed_space_info(space_info)) + use_cluster = false; + if (orig_root->ref_cows || empty_size) allowed_chunk_alloc = 1; - if (data & BTRFS_BLOCK_GROUP_METADATA) { + if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { last_ptr = &root->fs_info->meta_alloc_cluster; if (!btrfs_test_opt(root, SSD)) empty_cluster = 64 * 1024; } - if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { + if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && + btrfs_test_opt(root, SSD)) { last_ptr = &root->fs_info->data_alloc_cluster; } @@ -4680,6 +4928,10 @@ have_block_group: if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { u64 free_percent; + ret = cache_block_group(block_group, trans, 1); + if (block_group->cached == BTRFS_CACHE_FINISHED) + goto have_block_group; + free_percent = btrfs_block_group_used(&block_group->item); free_percent *= 100; free_percent = div64_u64(free_percent, @@ -4700,7 +4952,7 @@ have_block_group: if (loop > LOOP_CACHING_NOWAIT || (loop > LOOP_FIND_IDEAL && atomic_read(&space_info->caching_threads) < 2)) { - ret = cache_block_group(block_group); + ret = cache_block_group(block_group, trans, 0); BUG_ON(ret); } found_uncached_bg = true; @@ -5257,7 +5509,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, u64 num_bytes = ins->offset; block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); - cache_block_group(block_group); + cache_block_group(block_group, trans, 0); caching_ctl = get_caching_control(block_group); if (!caching_ctl) { @@ -7848,6 +8100,40 @@ out: return ret; } +void btrfs_put_block_group_cache(struct btrfs_fs_info *info) +{ + struct btrfs_block_group_cache *block_group; + u64 last = 0; + + while (1) { + struct inode *inode; + + block_group = btrfs_lookup_first_block_group(info, last); + while (block_group) { + spin_lock(&block_group->lock); + if (block_group->iref) + break; + spin_unlock(&block_group->lock); + block_group = next_block_group(info->tree_root, + block_group); + } + if (!block_group) { + if (last == 0) + break; + last = 0; + continue; + } + + inode = block_group->inode; + block_group->iref = 0; + block_group->inode = NULL; + spin_unlock(&block_group->lock); + iput(inode); + last = block_group->key.objectid + block_group->key.offset; + btrfs_put_block_group(block_group); + } +} + int btrfs_free_block_groups(struct btrfs_fs_info *info) { struct btrfs_block_group_cache *block_group; @@ -7931,6 +8217,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; + int need_clear = 0; + u64 cache_gen; root = info->extent_root; key.objectid = 0; @@ -7940,6 +8228,15 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (!path) return -ENOMEM; + cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); + if (cache_gen != 0 && + btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) + need_clear = 1; + if (btrfs_test_opt(root, CLEAR_CACHE)) + need_clear = 1; + if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) + printk(KERN_INFO "btrfs: disk space caching is enabled\n"); + while (1) { ret = find_first_block_group(root, path, &key); if (ret > 0) @@ -7962,6 +8259,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); + if (need_clear) + cache->disk_cache_state = BTRFS_DC_CLEAR; + /* * we only want to have 32k of ram per block group for keeping * track of free space, and if we pass 1/2 of that we want to @@ -8066,6 +8366,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->key.offset = size; cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; cache->sectorsize = root->sectorsize; + cache->fs_info = root->fs_info; /* * we only want to have 32k of ram per block group for keeping track @@ -8122,7 +8423,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_block_group_cache *block_group; struct btrfs_free_cluster *cluster; + struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_key key; + struct inode *inode; int ret; int factor; @@ -8158,6 +8461,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); + inode = lookup_free_space_inode(root, block_group, path); + if (!IS_ERR(inode)) { + btrfs_orphan_add(trans, inode); + clear_nlink(inode); + /* One for the block groups ref */ + spin_lock(&block_group->lock); + if (block_group->iref) { + block_group->iref = 0; + block_group->inode = NULL; + spin_unlock(&block_group->lock); + iput(inode); + } else { + spin_unlock(&block_group->lock); + } + /* One for our lookup ref */ + iput(inode); + } + + key.objectid = BTRFS_FREE_SPACE_OBJECTID; + key.offset = block_group->key.objectid; + key.type = 0; + + ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); + if (ret < 0) + goto out; + if (ret > 0) + btrfs_release_path(tree_root, path); + if (ret == 0) { + ret = btrfs_del_item(trans, tree_root, path); + if (ret) + goto out; + btrfs_release_path(tree_root, path); + } + spin_lock(&root->fs_info->block_group_cache_lock); rb_erase(&block_group->cache_node, &root->fs_info->block_group_cache_tree); @@ -8182,6 +8519,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->disk_total -= block_group->key.offset * factor; spin_unlock(&block_group->space_info->lock); + memcpy(&key, &block_group->key, sizeof(key)); + btrfs_clear_space_info_full(root->fs_info); btrfs_put_block_group(block_group); |
