From 3271d7eb00f1df82d9ea26b359ed065129639f7c Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Wed, 10 Nov 2021 10:37:13 +0800 Subject: f2fs: compress: reduce one page array alloc and free when write compressed page Don't alloc new page pointers array to replace old, just use old, introduce valid_nr_cpages to indicate valid number of page pointers in array, try to reduce one page array alloc and free when write compress page. Signed-off-by: Fengnan Chang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) (limited to 'fs/f2fs/compress.c') diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 49121a21f749..fb9e5149af5d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -154,6 +154,7 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) cc->rpages = NULL; cc->nr_rpages = 0; cc->nr_cpages = 0; + cc->valid_nr_cpages = 0; if (!reuse) cc->cluster_idx = NULL_CLUSTER; } @@ -620,7 +621,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc) const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; unsigned int max_len, new_nr_cpages; - struct page **new_cpages; u32 chksum = 0; int i, ret; @@ -635,6 +635,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) max_len = COMPRESS_HEADER_SIZE + cc->clen; cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); + cc->valid_nr_cpages = cc->nr_cpages; cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages); if (!cc->cpages) { @@ -685,13 +686,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc) new_nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); - /* Now we're going to cut unnecessary tail pages */ - new_cpages = page_array_alloc(cc->inode, new_nr_cpages); - if (!new_cpages) { - ret = -ENOMEM; - goto out_vunmap_cbuf; - } - /* zero out any unused part of the last page */ memset(&cc->cbuf->cdata[cc->clen], 0, (new_nr_cpages * PAGE_SIZE) - @@ -701,10 +695,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc) vm_unmap_ram(cc->rbuf, cc->cluster_size); for (i = 0; i < cc->nr_cpages; i++) { - if (i < new_nr_cpages) { - new_cpages[i] = cc->cpages[i]; + if (i < new_nr_cpages) continue; - } f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } @@ -712,9 +704,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) if (cops->destroy_compress_ctx) cops->destroy_compress_ctx(cc); - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); - cc->cpages = new_cpages; - cc->nr_cpages = new_nr_cpages; + cc->valid_nr_cpages = new_nr_cpages; trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, cc->clen, ret); @@ -1308,14 +1298,14 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; cic->inode = inode; - atomic_set(&cic->pending_pages, cc->nr_cpages); + atomic_set(&cic->pending_pages, cc->valid_nr_cpages); cic->rpages = page_array_alloc(cc->inode, cc->cluster_size); if (!cic->rpages) goto out_put_cic; cic->nr_rpages = cc->cluster_size; - for (i = 0; i < cc->nr_cpages; i++) { + for (i = 0; i < cc->valid_nr_cpages; i++) { f2fs_set_compressed_page(cc->cpages[i], inode, cc->rpages[i + 1]->index, cic); fio.compressed_page = cc->cpages[i]; @@ -1360,7 +1350,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (fio.compr_blocks && __is_valid_data_blkaddr(blkaddr)) fio.compr_blocks++; - if (i > cc->nr_cpages) { + if (i > cc->valid_nr_cpages) { if (__is_valid_data_blkaddr(blkaddr)) { f2fs_invalidate_blocks(sbi, blkaddr); f2fs_update_data_blkaddr(&dn, NEW_ADDR); @@ -1385,8 +1375,8 @@ unlock_continue: if (fio.compr_blocks) f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false); - f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true); - add_compr_block_stat(inode, cc->nr_cpages); + f2fs_i_compr_blocks_update(inode, cc->valid_nr_cpages, true); + add_compr_block_stat(inode, cc->valid_nr_cpages); set_inode_flag(cc->inode, FI_APPEND_WRITE); if (cc->cluster_idx == 0) @@ -1424,9 +1414,7 @@ out_unlock_op: else f2fs_unlock_op(sbi); out_free: - for (i = 0; i < cc->nr_cpages; i++) { - if (!cc->cpages[i]) - continue; + for (i = 0; i < cc->valid_nr_cpages; i++) { f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } -- cgit v1.2.3 From 7377e853967ba45bf409e3b5536624d2cbc99f21 Mon Sep 17 00:00:00 2001 From: Hyeong-Jun Kim Date: Fri, 10 Dec 2021 13:30:12 +0900 Subject: f2fs: compress: fix potential deadlock of compress file There is a potential deadlock between writeback process and a process performing write_begin() or write_cache_pages() while trying to write same compress file, but not compressable, as below: [Process A] - doing checkpoint [Process B] [Process C] f2fs_write_cache_pages() - lock_page() [all pages in cluster, 0-31] - f2fs_write_multi_pages() - f2fs_write_raw_pages() - f2fs_write_single_data_page() - f2fs_do_write_data_page() - return -EAGAIN [f2fs_trylock_op() failed] - unlock_page(page) [e.g., page 0] - generic_perform_write() - f2fs_write_begin() - f2fs_prepare_compress_overwrite() - prepare_compress_overwrite() - lock_page() [e.g., page 0] - lock_page() [e.g., page 1] - lock_page(page) [e.g., page 0] Since there is no compress process, it is no longer necessary to hold locks on every pages in cluster within f2fs_write_raw_pages(). This patch changes f2fs_write_raw_pages() to release all locks first and then perform write same as the non-compress file in f2fs_write_cache_pages(). Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Hyeong-Jun Kim Signed-off-by: Sungjong Seo Signed-off-by: Youngjin Gil Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 50 ++++++++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) (limited to 'fs/f2fs/compress.c') diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index fb9e5149af5d..4b49038d150d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1456,25 +1456,38 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, enum iostat_type io_type) { struct address_space *mapping = cc->inode->i_mapping; - int _submitted, compr_blocks, ret; - int i = -1, err = 0; + int _submitted, compr_blocks, ret, i; compr_blocks = f2fs_compressed_blocks(cc); - if (compr_blocks < 0) { - err = compr_blocks; - goto out_err; + + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + + redirty_page_for_writepage(wbc, cc->rpages[i]); + unlock_page(cc->rpages[i]); } + if (compr_blocks < 0) + return compr_blocks; + for (i = 0; i < cc->cluster_size; i++) { if (!cc->rpages[i]) continue; retry_write: + lock_page(cc->rpages[i]); + if (cc->rpages[i]->mapping != mapping) { +continue_unlock: unlock_page(cc->rpages[i]); continue; } - BUG_ON(!PageLocked(cc->rpages[i])); + if (!PageDirty(cc->rpages[i])) + goto continue_unlock; + + if (!clear_page_dirty_for_io(cc->rpages[i])) + goto continue_unlock; ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, NULL, NULL, wbc, io_type, @@ -1489,26 +1502,15 @@ retry_write: * avoid deadlock caused by cluster update race * from foreground operation. */ - if (IS_NOQUOTA(cc->inode)) { - err = 0; - goto out_err; - } + if (IS_NOQUOTA(cc->inode)) + return 0; ret = 0; cond_resched(); congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); - lock_page(cc->rpages[i]); - - if (!PageDirty(cc->rpages[i])) { - unlock_page(cc->rpages[i]); - continue; - } - - clear_page_dirty_for_io(cc->rpages[i]); goto retry_write; } - err = ret; - goto out_err; + return ret; } *submitted += _submitted; @@ -1517,14 +1519,6 @@ retry_write: f2fs_balance_fs(F2FS_M_SB(mapping), true); return 0; -out_err: - for (++i; i < cc->cluster_size; i++) { - if (!cc->rpages[i]) - continue; - redirty_page_for_writepage(wbc, cc->rpages[i]); - unlock_page(cc->rpages[i]); - } - return err; } int f2fs_write_multi_pages(struct compress_ctx *cc, -- cgit v1.2.3 From a9419b63bf414775e8aeee95d8c4a5e0df690748 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Dec 2021 14:16:32 -0800 Subject: f2fs: do not bother checkpoint by f2fs_get_node_info This patch tries to mitigate lock contention between f2fs_write_checkpoint and f2fs_get_node_info along with nat_tree_lock. The idea is, if checkpoint is currently running, other threads that try to grab nat_tree_lock would be better to wait for checkpoint. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/compress.c | 2 +- fs/f2fs/data.c | 8 ++++---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 6 +++--- fs/f2fs/inline.c | 4 ++-- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 19 ++++++++++--------- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 2 +- 11 files changed, 26 insertions(+), 25 deletions(-) (limited to 'fs/f2fs/compress.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f1693d45bb78..55e3c54d99c1 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -664,7 +664,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) /* truncate all the data during iput */ iput(inode); - err = f2fs_get_node_info(sbi, ino, &ni); + err = f2fs_get_node_info(sbi, ino, &ni, false); if (err) goto err_out; diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4b49038d150d..d0c3aeba5945 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1286,7 +1286,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; - err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); if (err) goto out_put_dnode; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 40c0d1426a11..2737fcc0dbcf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1355,7 +1355,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - err = f2fs_get_node_info(sbi, dn->nid, &ni); + err = f2fs_get_node_info(sbi, dn->nid, &ni, false); if (err) return err; @@ -1757,7 +1757,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (!page) return -ENOMEM; - err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { f2fs_put_page(page, 1); return err; @@ -1789,7 +1789,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (!page) return -ENOMEM; - err = f2fs_get_node_info(sbi, xnid, &ni); + err = f2fs_get_node_info(sbi, xnid, &ni, false); if (err) { f2fs_put_page(page, 1); return err; @@ -2649,7 +2649,7 @@ got_it: fio->need_lock = LOCK_REQ; } - err = f2fs_get_node_info(fio->sbi, dn.nid, &ni); + err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false); if (err) goto out_writepage; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5da592286721..a9ed2fd3fffb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3412,7 +3412,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, - struct node_info *ni); + struct node_info *ni, bool checkpoint_context); pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5ec6bef3937f..f540c1cbddca 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1233,7 +1233,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, if (ret) return ret; - ret = f2fs_get_node_info(sbi, dn.nid, &ni); + ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (ret) { f2fs_put_dnode(&dn); return ret; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7fbe46477a5a..a6accec60d04 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -959,7 +959,7 @@ next_step: continue; } - if (f2fs_get_node_info(sbi, nid, &ni)) { + if (f2fs_get_node_info(sbi, nid, &ni, false)) { f2fs_put_page(node_page, 1); continue; } @@ -1027,7 +1027,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (IS_ERR(node_page)) return false; - if (f2fs_get_node_info(sbi, nid, dni)) { + if (f2fs_get_node_info(sbi, nid, dni, false)) { f2fs_put_page(node_page, 1); return false; } @@ -1221,7 +1221,7 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); - err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); if (err) goto put_out; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index ea08f0dfa1bd..4b5cefa3f90c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -131,7 +131,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (err) return err; - err = f2fs_get_node_info(fio.sbi, dn->nid, &ni); + err = f2fs_get_node_info(fio.sbi, dn->nid, &ni, false); if (err) { f2fs_truncate_data_blocks_range(dn, 1); f2fs_put_dnode(dn); @@ -786,7 +786,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, ilen = start + len; ilen -= start; - err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); + err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni, false); if (err) goto out; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 935016e56010..2ab3b424735a 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -881,7 +881,7 @@ void f2fs_handle_failed_inode(struct inode *inode) * so we can prevent losing this orphan when encoutering checkpoint * and following suddenly power-off. */ - err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b1bc7d76da3b..e0b5eb28d383 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -543,7 +543,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) } int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, - struct node_info *ni) + struct node_info *ni, bool checkpoint_context) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -576,9 +576,10 @@ retry: * nat_tree_lock. Therefore, we should retry, if we failed to grab here * while not bothering checkpoint. */ - if (!rwsem_is_locked(&sbi->cp_global_sem)) { + if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { down_read(&curseg->journal_rwsem); - } else if (!down_read_trylock(&curseg->journal_rwsem)) { + } else if (rwsem_is_contended(&nm_i->nat_tree_lock) || + !down_read_trylock(&curseg->journal_rwsem)) { up_read(&nm_i->nat_tree_lock); goto retry; } @@ -891,7 +892,7 @@ static int truncate_node(struct dnode_of_data *dn) int err; pgoff_t index; - err = f2fs_get_node_info(sbi, dn->nid, &ni); + err = f2fs_get_node_info(sbi, dn->nid, &ni, false); if (err) return err; @@ -1290,7 +1291,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) goto fail; #ifdef CONFIG_F2FS_CHECK_FS - err = f2fs_get_node_info(sbi, dn->nid, &new_ni); + err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false); if (err) { dec_valid_node_count(sbi, dn->inode, !ofs); goto fail; @@ -1352,7 +1353,7 @@ static int read_node_page(struct page *page, int op_flags) return LOCKED_PAGE; } - err = f2fs_get_node_info(sbi, page->index, &ni); + err = f2fs_get_node_info(sbi, page->index, &ni, false); if (err) return err; @@ -1604,7 +1605,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, nid = nid_of_node(page); f2fs_bug_on(sbi, page->index != nid); - if (f2fs_get_node_info(sbi, nid, &ni)) + if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) goto redirty_out; if (wbc->for_reclaim) { @@ -2705,7 +2706,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) goto recover_xnid; /* 1: invalidate the previous xattr nid */ - err = f2fs_get_node_info(sbi, prev_xnid, &ni); + err = f2fs_get_node_info(sbi, prev_xnid, &ni, false); if (err) return err; @@ -2745,7 +2746,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) struct page *ipage; int err; - err = f2fs_get_node_info(sbi, ino, &old_ni); + err = f2fs_get_node_info(sbi, ino, &old_ni, false); if (err) return err; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6a1b4668d933..e65c73c4411d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -595,7 +595,7 @@ retry_dn: f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); - err = f2fs_get_node_info(sbi, dn.nid, &ni); + err = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (err) goto err; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index df9ed75f0b7a..b4a2f8c36149 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -254,7 +254,7 @@ retry: goto next; } - err = f2fs_get_node_info(sbi, dn.nid, &ni); + err = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (err) { f2fs_put_dnode(&dn); return err; -- cgit v1.2.3