From 0916878da355650d7e77104a7ac0fa1784eca852 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 16 Mar 2019 09:13:06 +0900 Subject: f2fs: Fix use of number of devices For a single device mount using a zoned block device, the zone information for the device is stored in the sbi->devs single entry array and sbi->s_ndevs is set to 1. This differs from a single device mount using a regular block device which does not allocate sbi->devs and sets sbi->s_ndevs to 0. However, sbi->s_devs == 0 condition is used throughout the code to differentiate a single device mount from a multi-device mount where sbi->s_ndevs is always larger than 1. This results in problems with single zoned block device volumes as these are treated as multi-device mounts but do not have the start_blk and end_blk information set. One of the problem observed is skipping of zone discard issuing resulting in write commands being issued to full zones or unaligned to a zone write pointer. Fix this problem by simply treating the cases sbi->s_ndevs == 0 (single regular block device mount) and sbi->s_ndevs == 1 (single zoned block device mount) in the same manner. This is done by introducing the helper function f2fs_is_multi_device() and using this helper in place of direct tests of sbi->s_ndevs value, improving code readability. Fixes: 7bb3a371d199 ("f2fs: Fix zoned block device support") Cc: Signed-off-by: Damien Le Moal Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9727944139f2..d87dfa5aa112 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -220,12 +220,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, struct block_device *bdev = sbi->sb->s_bdev; int i; - for (i = 0; i < sbi->s_ndevs; i++) { - if (FDEV(i).start_blk <= blk_addr && - FDEV(i).end_blk >= blk_addr) { - blk_addr -= FDEV(i).start_blk; - bdev = FDEV(i).bdev; - break; + if (f2fs_is_multi_device(sbi)) { + for (i = 0; i < sbi->s_ndevs; i++) { + if (FDEV(i).start_blk <= blk_addr && + FDEV(i).end_blk >= blk_addr) { + blk_addr -= FDEV(i).start_blk; + bdev = FDEV(i).bdev; + break; + } } } if (bio) { @@ -239,6 +241,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) { int i; + if (!f2fs_is_multi_device(sbi)) + return 0; + for (i = 0; i < sbi->s_ndevs; i++) if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) return i; -- cgit v1.2.3 From 186857c5a14aee85cace2ae7a36c6e43b9d3c7a5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 2 Apr 2019 18:52:19 +0800 Subject: f2fs: fix potential recursive call when enabling data_flush As Hagbard Celine reported: Hi, this is a long standing bug that I've hit before on older kernels, but I was not able to get the syslog saved because of the nature of the bug. This time I had booted form a pen-drive, and was able to save the log to it's efi-partition. What i did to trigger it was to create a partition and format it f2fs, then mount it with options: "rw,relatime,lazytime,background_gc=on,disable_ext_identify,discard,heap,user_xattr,inline_xattr,acl,inline_data,inline_dentry,flush_merge,data_flush,extent_cache,mode=adaptive,active_logs=6,whint_mode=fs-based,alloc_mode=default,fsync_mode=strict". Then I unpacked a big .tar.xz to the partition (I used a gentoo-stage3-tarball as I was in process of installing Gentoo). Same options just without data_flush gives no problems. Mar 20 20:54:01 usbgentoo kernel: FAT-fs (nvme0n1p4): Volume was not properly unmounted. Some data may be corrupt. Please run fsck. Mar 20 21:05:23 usbgentoo kernel: kworker/dying (1588) used greatest stack depth: 12064 bytes left Mar 20 21:06:40 usbgentoo kernel: BUG: stack guard page was hit at 00000000a4b0733c (stack is 0000000056016422..0000000096e7463f) Mar 20 21:06:40 usbgentoo kernel: kernel stack overflow ...... Mar 20 21:06:40 usbgentoo kernel: Call Trace: Mar 20 21:06:40 usbgentoo kernel: read_node_page+0x71/0xf0 Mar 20 21:06:40 usbgentoo kernel: ? xas_load+0x8/0x50 Mar 20 21:06:40 usbgentoo kernel: __get_node_page+0x73/0x2a0 Mar 20 21:06:40 usbgentoo kernel: f2fs_get_dnode_of_data+0x34e/0x580 Mar 20 21:06:40 usbgentoo kernel: f2fs_write_inline_data+0x5e/0x2a0 Mar 20 21:06:40 usbgentoo kernel: __write_data_page+0x421/0x690 Mar 20 21:06:40 usbgentoo kernel: f2fs_write_cache_pages+0x1cf/0x460 Mar 20 21:06:40 usbgentoo kernel: f2fs_write_data_pages+0x2b3/0x2e0 Mar 20 21:06:40 usbgentoo kernel: ? f2fs_inode_chksum_verify+0x1d/0xc0 Mar 20 21:06:40 usbgentoo kernel: ? read_node_page+0x71/0xf0 Mar 20 21:06:40 usbgentoo kernel: do_writepages+0x3c/0xd0 Mar 20 21:06:40 usbgentoo kernel: __filemap_fdatawrite_range+0x7c/0xb0 Mar 20 21:06:40 usbgentoo kernel: f2fs_sync_dirty_inodes+0xf2/0x200 Mar 20 21:06:40 usbgentoo kernel: f2fs_balance_fs_bg+0x2a3/0x2c0 Mar 20 21:06:40 usbgentoo kernel: ? f2fs_inode_dirtied+0x21/0xc0 Mar 20 21:06:40 usbgentoo kernel: f2fs_balance_fs+0xd6/0x2b0 Mar 20 21:06:40 usbgentoo kernel: __write_data_page+0x4fb/0x690 ...... Mar 20 21:06:40 usbgentoo kernel: __writeback_single_inode+0x2a1/0x340 Mar 20 21:06:40 usbgentoo kernel: ? soft_cursor+0x1b4/0x220 Mar 20 21:06:40 usbgentoo kernel: writeback_sb_inodes+0x1d5/0x3e0 Mar 20 21:06:40 usbgentoo kernel: __writeback_inodes_wb+0x58/0xa0 Mar 20 21:06:40 usbgentoo kernel: wb_writeback+0x250/0x2e0 Mar 20 21:06:40 usbgentoo kernel: ? 0xffffffff8c000000 Mar 20 21:06:40 usbgentoo kernel: ? cpumask_next+0x16/0x20 Mar 20 21:06:40 usbgentoo kernel: wb_workfn+0x2f6/0x3b0 Mar 20 21:06:40 usbgentoo kernel: ? __switch_to_asm+0x40/0x70 Mar 20 21:06:40 usbgentoo kernel: process_one_work+0x1f5/0x3f0 Mar 20 21:06:40 usbgentoo kernel: worker_thread+0x28/0x3c0 Mar 20 21:06:40 usbgentoo kernel: ? rescuer_thread+0x330/0x330 Mar 20 21:06:40 usbgentoo kernel: kthread+0x10e/0x130 Mar 20 21:06:40 usbgentoo kernel: ? kthread_create_on_node+0x60/0x60 Mar 20 21:06:40 usbgentoo kernel: ret_from_fork+0x35/0x40 The root cause is that we run into an infinite recursive calling in between f2fs_balance_fs_bg and writepage() as described below: - f2fs_write_data_pages --- A - __write_data_page - f2fs_balance_fs - f2fs_balance_fs_bg --- B - f2fs_sync_dirty_inodes - filemap_fdatawrite - f2fs_write_data_pages --- A ... - f2fs_balance_fs_bg --- B ... In order to fix this issue, let's detect such condition in __write_data_page() and just skip calling f2fs_balance_fs() recursively. Reported-by: Hagbard Celine Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 ++---- fs/f2fs/data.c | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a98e1b02279e..935ebdb9cf47 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1009,13 +1009,11 @@ retry: if (inode) { unsigned long cur_ino = inode->i_ino; - if (is_dir) - F2FS_I(inode)->cp_task = current; + F2FS_I(inode)->cp_task = current; filemap_fdatawrite(inode->i_mapping); - if (is_dir) - F2FS_I(inode)->cp_task = NULL; + F2FS_I(inode)->cp_task = NULL; iput(inode); /* We need to give cpu to another writers. */ diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d87dfa5aa112..9d3c11e09a03 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2038,7 +2038,8 @@ out: } unlock_page(page); - if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode)) + if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && + !F2FS_I(inode)->cp_task) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { -- cgit v1.2.3 From adcc00f7dcbf0131070ecc750cf83ee1840f603d Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sat, 6 Apr 2019 16:29:36 +0530 Subject: f2fs: data: fix warning Using plain integer as NULL pointer changed passing function argument "0 to NULL" to fix below sparse warning fs/f2fs/data.c:426:47: warning: Using plain integer as NULL pointer Signed-off-by: Hariprasad Kelam Reviewed-by: Chao Yu Reviewed-by: Mukesh Ojha Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9d3c11e09a03..0c582b388742 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -428,7 +428,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) { - __submit_merged_write_cond(sbi, NULL, 0, 0, type, true); + __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true); } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From 6dc3a12663c8a99ef033287f48bbdd61b6b1979b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 15 Apr 2019 15:26:31 +0800 Subject: f2fs: fix wrong __is_meta_io() macro This patch changes codes as below: - don't use is_read_io() as a condition to judge the meta IO. - use .is_por to replace .is_meta to indicate IO is from recovery explicitly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 3 ++- fs/f2fs/f2fs.h | 5 ++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 935ebdb9cf47..f42b0015724b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -66,7 +66,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, - .is_meta = is_meta, + .is_por = !is_meta, }; int err; @@ -189,7 +189,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, - .is_meta = (type != META_POR), + .is_por = (type == META_POR), }; struct blk_plug plug; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0c582b388742..da932eeecf30 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -456,7 +456,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) fio->encrypted_page : fio->page; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, - __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + fio->is_por ? META_POR : + (__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))) return -EFAULT; trace_f2fs_submit_page_bio(page, fio); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 43475794e232..4c6fe137962d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1041,7 +1041,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ - bool is_meta; /* indicate borrow meta inode mapping or not */ + bool is_por; /* indicate IO is from recovery or not */ bool retry; /* need to reallocate block address */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ @@ -2831,8 +2831,7 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) -#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META && \ - (!is_read_io((fio)->op) || (fio)->is_meta)) +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); -- cgit v1.2.3 From cd23ffa9fcba071c6a6129c46bf41acca77fab4a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 15 Apr 2019 15:30:53 +0800 Subject: f2fs: fix to set FI_UPDATE_WRITE correctly This patch fixes to set FI_UPDATE_WRITE only if in-place IO was issued. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index da932eeecf30..7a67d6161b84 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1875,9 +1875,10 @@ got_it: true); if (PageWriteback(page)) end_page_writeback(page); + } else { + set_inode_flag(inode, FI_UPDATE_WRITE); } trace_f2fs_do_write_data_page(fio->page, IPU); - set_inode_flag(inode, FI_UPDATE_WRITE); return err; } -- cgit v1.2.3 From 2df0ab045784a1ca904437601a5086f570e8cf16 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 Mar 2019 21:07:30 +0800 Subject: f2fs: introduce f2fs_read_single_page() for cleanup This patch introduces f2fs_read_single_page() to wrap core operations of reading one page in f2fs_mpage_readpages(). In addition, if we failed in f2fs_mpage_readpages(), propagate error number to f2fs_read_data_page(), for f2fs_read_data_pages() path, always return success. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 214 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 118 insertions(+), 96 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7a67d6161b84..cf89e36577bf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1508,6 +1508,118 @@ out: return ret; } +static int f2fs_read_single_page(struct inode *inode, struct page *page, + unsigned nr_pages, + struct f2fs_map_blocks *map, + struct bio **bio_ret, + sector_t *last_block_in_bio, + bool is_readahead) +{ + struct bio *bio = *bio_ret; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + sector_t block_in_file; + sector_t last_block; + sector_t last_block_in_file; + sector_t block_nr; + int ret = 0; + + block_in_file = (sector_t)page->index; + last_block = block_in_file + nr_pages; + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + blkbits; + if (last_block > last_block_in_file) + last_block = last_block_in_file; + + /* just zeroing out page which is beyond EOF */ + if (block_in_file >= last_block) + goto zero_out; + /* + * Map blocks using the previous result first. + */ + if ((map->m_flags & F2FS_MAP_MAPPED) && + block_in_file > map->m_lblk && + block_in_file < (map->m_lblk + map->m_len)) + goto got_it; + + /* + * Then do more f2fs_map_blocks() calls until we are + * done with this page. + */ + map->m_lblk = block_in_file; + map->m_len = last_block - block_in_file; + + ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT); + if (ret) + goto out; +got_it: + if ((map->m_flags & F2FS_MAP_MAPPED)) { + block_nr = map->m_pblk + block_in_file - map->m_lblk; + SetPageMappedToDisk(page); + + if (!PageUptodate(page) && !cleancache_get_page(page)) { + SetPageUptodate(page); + goto confused; + } + + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, + DATA_GENERIC)) { + ret = -EFAULT; + goto out; + } + } else { +zero_out: + zero_user_segment(page, 0, PAGE_SIZE); + if (!PageUptodate(page)) + SetPageUptodate(page); + unlock_page(page); + goto out; + } + + /* + * This page will go to BIO. Do we need to send this + * BIO off first? + */ + if (bio && (*last_block_in_bio != block_nr - 1 || + !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { +submit_and_realloc: + __submit_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + if (bio == NULL) { + bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, + is_readahead ? REQ_RAHEAD : 0); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + bio = NULL; + goto out; + } + } + + /* + * If the page is under writeback, we need to wait for + * its completion to see the correct decrypted data. + */ + f2fs_wait_on_block_writeback(inode, block_nr); + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + ClearPageError(page); + *last_block_in_bio = block_nr; + goto out; +confused: + if (bio) { + __submit_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + unlock_page(page); +out: + *bio_ret = bio; + return ret; +} + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -1524,13 +1636,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping, struct bio *bio = NULL; sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; - sector_t block_in_file; - sector_t last_block; - sector_t last_block_in_file; - sector_t block_nr; struct f2fs_map_blocks map; + int ret = 0; map.m_pblk = 0; map.m_lblk = 0; @@ -1553,98 +1660,13 @@ static int f2fs_mpage_readpages(struct address_space *mapping, goto next_page; } - block_in_file = (sector_t)page->index; - last_block = block_in_file + nr_pages; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> - blkbits; - if (last_block > last_block_in_file) - last_block = last_block_in_file; - - /* just zeroing out page which is beyond EOF */ - if (block_in_file >= last_block) - goto zero_out; - /* - * Map blocks using the previous result first. - */ - if ((map.m_flags & F2FS_MAP_MAPPED) && - block_in_file > map.m_lblk && - block_in_file < (map.m_lblk + map.m_len)) - goto got_it; - - /* - * Then do more f2fs_map_blocks() calls until we are - * done with this page. - */ - map.m_lblk = block_in_file; - map.m_len = last_block - block_in_file; - - if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT)) - goto set_error_page; -got_it: - if ((map.m_flags & F2FS_MAP_MAPPED)) { - block_nr = map.m_pblk + block_in_file - map.m_lblk; - SetPageMappedToDisk(page); - - if (!PageUptodate(page) && !cleancache_get_page(page)) { - SetPageUptodate(page); - goto confused; - } - - if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, - DATA_GENERIC)) - goto set_error_page; - } else { -zero_out: + ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio, + &last_block_in_bio, is_readahead); + if (ret) { + SetPageError(page); zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); unlock_page(page); - goto next_page; } - - /* - * This page will go to BIO. Do we need to send this - * BIO off first? - */ - if (bio && (last_block_in_bio != block_nr - 1 || - !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { -submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), bio, DATA); - bio = NULL; - } - if (bio == NULL) { - bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0); - if (IS_ERR(bio)) { - bio = NULL; - goto set_error_page; - } - } - - /* - * If the page is under writeback, we need to wait for - * its completion to see the correct decrypted data. - */ - f2fs_wait_on_block_writeback(inode, block_nr); - - if (bio_add_page(bio, page, blocksize, 0) < blocksize) - goto submit_and_realloc; - - inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); - ClearPageError(page); - last_block_in_bio = block_nr; - goto next_page; -set_error_page: - SetPageError(page); - zero_user_segment(page, 0, PAGE_SIZE); - unlock_page(page); - goto next_page; -confused: - if (bio) { - __submit_bio(F2FS_I_SB(inode), bio, DATA); - bio = NULL; - } - unlock_page(page); next_page: if (pages) put_page(page); @@ -1652,7 +1674,7 @@ next_page: BUG_ON(pages && !list_empty(pages)); if (bio) __submit_bio(F2FS_I_SB(inode), bio, DATA); - return 0; + return pages ? 0 : ret; } static int f2fs_read_data_page(struct file *file, struct page *page) -- cgit v1.2.3 From 93770ab7a6e963147a5dbca25278b69ba6c8f8c5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 15 Apr 2019 15:26:32 +0800 Subject: f2fs: introduce DATA_GENERIC_ENHANCE Previously, f2fs_is_valid_blkaddr(, blkaddr, DATA_GENERIC) will check whether @blkaddr locates in main area or not. That check is weak, since the block address in range of main area can point to the address which is not valid in segment info table, and we can not detect such condition, we may suffer worse corruption as system continues running. So this patch introduce DATA_GENERIC_ENHANCE to enhance the sanity check which trigger SIT bitmap check rather than only range check. This patch did below changes as wel: - set SBI_NEED_FSCK in f2fs_is_valid_blkaddr(). - get rid of is_valid_data_blkaddr() to avoid panic if blkaddr is invalid. - introduce verify_fio_blkaddr() to wrap fio {new,old}_blkaddr validation check. - spread blkaddr check in: * f2fs_get_node_info() * __read_out_blkaddrs() * f2fs_submit_page_read() * ra_data_block() * do_recover_data() This patch can fix bug reported from bugzilla below: https://bugzilla.kernel.org/show_bug.cgi?id=203215 https://bugzilla.kernel.org/show_bug.cgi?id=203223 https://bugzilla.kernel.org/show_bug.cgi?id=203231 https://bugzilla.kernel.org/show_bug.cgi?id=203235 https://bugzilla.kernel.org/show_bug.cgi?id=203241 = Update by Jaegeuk Kim = DATA_GENERIC_ENHANCE enhanced to validate block addresses on read/write paths. But, xfstest/generic/446 compalins some generated kernel messages saying invalid bitmap was detected when reading a block. The reaons is, when we get the block addresses from extent_cache, there is no lock to synchronize it from truncating the blocks in parallel. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 43 +++++++++++++++++++++++++++++++++++++------ fs/f2fs/data.c | 49 ++++++++++++++++++++++++++++++++----------------- fs/f2fs/f2fs.h | 18 ++++++++---------- fs/f2fs/file.c | 15 ++++++++++++--- fs/f2fs/gc.c | 11 ++++++++++- fs/f2fs/inode.c | 7 ++++--- fs/f2fs/node.c | 13 ++++++++++--- fs/f2fs/recovery.c | 12 ++++++++++++ fs/f2fs/segment.c | 4 ++-- fs/f2fs/segment.h | 13 +++++++------ 10 files changed, 134 insertions(+), 51 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b8614cf77cdd..805a33088e82 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -130,6 +130,30 @@ struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, false); } +static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, + int type) +{ + struct seg_entry *se; + unsigned int segno, offset; + bool exist; + + if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ) + return true; + + segno = GET_SEGNO(sbi, blkaddr); + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + se = get_seg_entry(sbi, segno); + + exist = f2fs_test_bit(offset, se->cur_valid_map); + if (!exist && type == DATA_GENERIC_ENHANCE) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "blkaddr:%u, sit bitmap:%d", blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); + } + return exist; +} + bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { @@ -151,15 +175,22 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, return false; break; case META_POR: + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; case DATA_GENERIC: + case DATA_GENERIC_ENHANCE: + case DATA_GENERIC_ENHANCE_READ: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || - blkaddr < MAIN_BLKADDR(sbi))) { - if (type == DATA_GENERIC) { - f2fs_msg(sbi->sb, KERN_WARNING, - "access invalid blkaddr:%u", blkaddr); - WARN_ON(1); - } + blkaddr < MAIN_BLKADDR(sbi))) { + f2fs_msg(sbi->sb, KERN_WARNING, + "access invalid blkaddr:%u", blkaddr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON(1); return false; + } else { + return __is_bitmap_valid(sbi, blkaddr, type); } break; case META_GENERIC: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cf89e36577bf..d32a82f25f5a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -456,8 +456,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) fio->encrypted_page : fio->page; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, - fio->is_por ? META_POR : - (__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))) + fio->is_por ? META_POR : (__is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFAULT; trace_f2fs_submit_page_bio(page, fio); @@ -507,9 +507,7 @@ next: spin_unlock(&io->io_lock); } - if (__is_valid_data_blkaddr(fio->old_blkaddr)) - verify_block_addr(fio, fio->old_blkaddr); - verify_block_addr(fio, fio->new_blkaddr); + verify_fio_blkaddr(fio); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; @@ -566,9 +564,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, struct bio_post_read_ctx *ctx; unsigned int post_read_steps = 0; - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) - return ERR_PTR(-EFAULT); - bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) return ERR_PTR(-ENOMEM); @@ -596,8 +591,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr) { - struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct bio *bio; + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -609,8 +606,8 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, return -EFAULT; } ClearPageError(page); - inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); - __submit_bio(F2FS_I_SB(inode), bio, DATA); + inc_page_count(sbi, F2FS_RD_DATA); + __submit_bio(sbi, bio, DATA); return 0; } @@ -738,6 +735,11 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, if (f2fs_lookup_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFAULT; + goto put_err; + } goto got_it; } @@ -751,6 +753,13 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, err = -ENOENT; goto put_err; } + if (dn.data_blkaddr != NEW_ADDR && + !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), + dn.data_blkaddr, + DATA_GENERIC_ENHANCE)) { + err = -EFAULT; + goto put_err; + } got_it: if (PageUptodate(page)) { unlock_page(page); @@ -1093,12 +1102,12 @@ next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFAULT; goto sync_out; } - if (is_valid_data_blkaddr(sbi, blkaddr)) { + if (__is_valid_data_blkaddr(blkaddr)) { /* use out-place-update for driect IO under LFS mode */ if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { @@ -1563,7 +1572,7 @@ got_it: } if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, - DATA_GENERIC)) { + DATA_GENERIC_ENHANCE_READ)) { ret = -EFAULT; goto out; } @@ -1844,7 +1853,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) fio->old_blkaddr = ei.blk + page->index - ei.fofs; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC)) + DATA_GENERIC_ENHANCE)) return -EFAULT; ipu_force = true; @@ -1871,7 +1880,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) got_it: if (__is_valid_data_blkaddr(fio->old_blkaddr) && !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC)) { + DATA_GENERIC_ENHANCE)) { err = -EFAULT; goto out_writepage; } @@ -1879,7 +1888,8 @@ got_it: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) && + if (ipu_force || + (__is_valid_data_blkaddr(fio->old_blkaddr) && need_inplace_update(fio))) { err = encrypt_one_page(fio); if (err) @@ -2524,6 +2534,11 @@ repeat: zero_user_segment(page, 0, PAGE_SIZE); SetPageUptodate(page); } else { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE_READ)) { + err = -EFAULT; + goto fail; + } err = f2fs_submit_page_read(inode, page, blkaddr); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1c814a309748..533fafca68f4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -210,7 +210,14 @@ enum { META_SSA, META_MAX, META_POR, - DATA_GENERIC, + DATA_GENERIC, /* check range only */ + DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */ + DATA_GENERIC_ENHANCE_READ, /* + * strong check on range and segment + * bitmap but no warning due to race + * condition of read on truncated area + * by extent_cache + */ META_GENERIC, }; @@ -2864,15 +2871,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr) return true; } -static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, - block_t blkaddr) -{ - if (!__is_valid_data_blkaddr(blkaddr)) - return false; - verify_blkaddr(sbi, blkaddr, DATA_GENERIC); - return true; -} - static inline void f2fs_set_page_private(struct page *page, unsigned long data) { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e5fa1940ed3c..3baa39e3e1fd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -356,7 +356,7 @@ static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr, switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || - is_valid_data_blkaddr(sbi, blkaddr)) + __is_valid_data_blkaddr(blkaddr)) return true; break; case SEEK_HOLE: @@ -422,7 +422,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), - blkaddr, DATA_GENERIC)) { + blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); goto fail; } @@ -523,7 +523,8 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) f2fs_set_data_blkaddr(dn); if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) + !f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE)) continue; f2fs_invalidate_blocks(sbi, blkaddr); @@ -1018,6 +1019,14 @@ next_dnode: for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + + if (__is_valid_data_blkaddr(*blkaddr) && + !f2fs_is_valid_blkaddr(sbi, *blkaddr, + DATA_GENERIC_ENHANCE)) { + f2fs_put_dnode(&dn); + return -EFAULT; + } + if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { if (test_opt(sbi, LFS)) { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ba30ec90952f..963fb4571fd9 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -656,6 +656,11 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (f2fs_lookup_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; + if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, + DATA_GENERIC_ENHANCE_READ))) { + err = -EFAULT; + goto put_page; + } goto got_it; } @@ -665,8 +670,12 @@ static int ra_data_block(struct inode *inode, pgoff_t index) goto put_page; f2fs_put_dnode(&dn); + if (!__is_valid_data_blkaddr(dn.data_blkaddr)) { + err = -ENOENT; + goto put_page; + } if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, - DATA_GENERIC))) { + DATA_GENERIC_ENHANCE))) { err = -EFAULT; goto put_page; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b53952a15ffa..ccb02226dd2c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -73,7 +73,7 @@ static int __written_first_block(struct f2fs_sb_info *sbi, if (!__is_valid_data_blkaddr(addr)) return 1; - if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC)) + if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) return -EFAULT; return 0; } @@ -267,9 +267,10 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; if (ei->len && - (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) || + (!f2fs_is_valid_blkaddr(sbi, ei->blk, + DATA_GENERIC_ENHANCE) || !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, - DATA_GENERIC))) { + DATA_GENERIC_ENHANCE))) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, "%s: inode (ino=%lx) extent info [%u, %u, %u] " diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 057362a821a0..a6960b47f394 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -454,7 +454,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, new_blkaddr == NULL_ADDR); f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) && + f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) && new_blkaddr == NEW_ADDR); /* increment version no as node is removed */ @@ -465,7 +465,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* change address */ nat_set_blkaddr(e, new_blkaddr); - if (!is_valid_data_blkaddr(sbi, new_blkaddr)) + if (!__is_valid_data_blkaddr(new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); @@ -526,6 +526,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nat_entry ne; struct nat_entry *e; pgoff_t index; + block_t blkaddr; int i; ni->nid = nid; @@ -569,6 +570,11 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: + blkaddr = le32_to_cpu(ne.block_addr); + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) + return -EFAULT; + /* cache nat entry */ cache_nat_entry(sbi, nid, &ne); return 0; @@ -1548,7 +1554,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, } if (__is_valid_data_blkaddr(ni.blk_addr) && - !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) { + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, + DATA_GENERIC_ENHANCE)) { up_read(&sbi->node_write); goto redirty_out; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b14c718139a9..e04f82b3f4fc 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -567,6 +567,18 @@ retry_dn: src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); dest = datablock_addr(dn.inode, page, dn.ofs_in_node); + if (__is_valid_data_blkaddr(src) && + !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { + err = -EFAULT; + goto err; + } + + if (__is_valid_data_blkaddr(dest) && + !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { + err = -EFAULT; + goto err; + } + /* skip recovering if dest is the same as src */ if (src == dest) continue; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d3bf7a2abbc9..8388d2abacb5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2217,7 +2217,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) struct seg_entry *se; bool is_cp = false; - if (!is_valid_data_blkaddr(sbi, blkaddr)) + if (!__is_valid_data_blkaddr(blkaddr)) return true; down_read(&sit_i->sentry_lock); @@ -3338,7 +3338,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) if (!f2fs_post_read_required(inode)) return; - if (!is_valid_data_blkaddr(sbi, blkaddr)) + if (!__is_valid_data_blkaddr(blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 6f48e0763279..429007b8036e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -82,7 +82,7 @@ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \ + ((!__is_valid_data_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ @@ -656,14 +656,15 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) +static inline void verify_fio_blkaddr(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; - if (__is_meta_io(fio)) - verify_blkaddr(sbi, blk_addr, META_GENERIC); - else - verify_blkaddr(sbi, blk_addr, DATA_GENERIC); + if (__is_valid_data_blkaddr(fio->old_blkaddr)) + verify_blkaddr(sbi, fio->old_blkaddr, __is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC); + verify_blkaddr(sbi, fio->new_blkaddr, __is_meta_io(fio) ? + META_GENERIC : DATA_GENERIC_ENHANCE); } /* -- cgit v1.2.3