From 1ffc8f5f7751f91fe6af527d426a723231b741a6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Jul 2021 16:14:02 -0700 Subject: f2fs: let's keep writing IOs on SBI_NEED_FSCK SBI_NEED_FSCK is an indicator that fsck.f2fs needs to be triggered, so it is not fully critical to stop any IO writes. So, let's allow to write data instead of reporting EIO forever given SBI_NEED_FSCK, but do keep OPU. Fixes: 955772787667 ("f2fs: drop inplace IO if fs status is abnormal") Cc: # v5.13+ Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 15cc89eef28d..f9b7fb785e1d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3563,7 +3563,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) goto drop_bio; } - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || f2fs_cp_error(sbi)) { + if (f2fs_cp_error(sbi)) { err = -EIO; goto drop_bio; } -- cgit v1.2.3 From dc675a97129c4d9d5af55a3d7f23d7e092b8e032 Mon Sep 17 00:00:00 2001 From: Laibin Qiu Date: Sat, 31 Jul 2021 11:26:46 +0800 Subject: f2fs: fix min_seq_blocks can not make sense in some scenes. F2FS have dirty page count control for batched sequential write in writepages, and get the value of min_seq_blocks by blocks_per_seg * segs_per_sec(segs_per_sec defaults to 1). But in some scenes we set a lager section size, Min_seq_blocks will become too large to achieve the expected effect(eg. 4thread sequential write, the number of merge requests will be reduced). Signed-off-by: Laibin Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f9b7fb785e1d..0f976cefe425 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5159,7 +5159,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; - sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec; + sm_info->min_seq_blocks = sbi->blocks_per_seg; sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->min_ssr_sections = reserved_sections(sbi); -- cgit v1.2.3 From 4f993264fe2965c344f223d854ccbb549b16ed71 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Aug 2021 08:15:43 +0800 Subject: f2fs: introduce discard_unit mount option As James Z reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213877 [1.] One-line summary of the problem: Mount multiple SMR block devices exceed certain number cause system non-response [2.] Full description of the problem/report: Created some F2FS on SMR devices (mkfs.f2fs -m), then mounted in sequence. Each device is the same Model: HGST HSH721414AL (Size 14TB). Empirically, found that when the amount of SMR device * 1.5Gb > System RAM, the system ran out of memory and hung. No dmesg output. For example, 24 SMR Disk need 24*1.5GB = 36GB. A system with 32G RAM can only mount 21 devices, the 22nd device will be a reproducible cause of system hang. The number of SMR devices with other FS mounted on this system does not interfere with the result above. [3.] Keywords (i.e., modules, networking, kernel): F2FS, SMR, Memory [4.] Kernel information [4.1.] Kernel version (uname -a): Linux 5.13.4-200.fc34.x86_64 #1 SMP Tue Jul 20 20:27:29 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux [4.2.] Kernel .config file: Default Fedora 34 with f2fs-tools-1.14.0-2.fc34.x86_64 [5.] Most recent kernel version which did not have the bug: None [6.] Output of Oops.. message (if applicable) with symbolic information resolved (see Documentation/admin-guide/oops-tracing.rst) None [7.] A small shell script or example program which triggers the problem (if possible) mount /dev/sdX /mnt/0X [8.] Memory consumption With 24 * 14T SMR Block device with F2FS free -g total used free shared buff/cache available Mem: 46 36 0 0 10 10 Swap: 0 0 0 With 3 * 14T SMR Block device with F2FS free -g total used free shared buff/cache available Mem: 7 5 0 0 1 1 Swap: 7 0 7 The root cause is, there are three bitmaps: - cur_valid_map - ckpt_valid_map - discard_map and each of them will cost ~500MB memory, {cur, ckpt}_valid_map are necessary, but discard_map is optional, since this bitmap will only be useful in mountpoint that small discard is enabled. For a blkzoned device such as SMR or ZNS devices, f2fs will only issue discard for a section(zone) when all blocks of that section are invalid, so, for such device, we don't need small discard functionality at all. This patch introduces a new mountoption "discard_unit=block|segment| section" to support issuing discard with different basic unit which is aligned to block, segment or section, so that user can specify "discard_unit=segment" or "discard_unit=section" to disable small discard functionality. Note that this mount option can not be changed by remount() due to related metadata need to be initialized during mount(). In order to save memory, let's use "discard_unit=section" for blkzoned device by default. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 8 ++++ fs/f2fs/f2fs.h | 16 ++++++++ fs/f2fs/segment.c | 82 ++++++++++++++++++++++++-------------- fs/f2fs/super.c | 54 +++++++++++++++++++++++-- fs/f2fs/sysfs.c | 2 + 5 files changed, 130 insertions(+), 32 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index ff9e7cc97c65..8f251a662542 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -312,6 +312,14 @@ inlinecrypt When possible, encrypt/decrypt the contents of encrypted Documentation/block/inline-encryption.rst. atgc Enable age-threshold garbage collection, it provides high effectiveness and efficiency on background GC. +discard_unit=%s Control discard unit, the argument can be "block", "segment" + and "section", issued discard command's offset/size will be + aligned to the unit, by default, "discard_unit=block" is set, + so that small discard functionality is enabled. + For blkzoned device, "discard_unit=section" will be set by + default, it is helpful for large sized SMR or ZNS devices to + reduce memory cost by getting rid of fs metadata supports small + discard. ======================== ============================================================ Debugfs Entries diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8459b6d5a2f8..8d4665a7a0fb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -139,6 +139,11 @@ struct f2fs_mount_info { int fsync_mode; /* fsync policy */ int fs_mode; /* fs mode: LFS or ADAPTIVE */ int bggc_mode; /* bggc mode: off, on or sync */ + int discard_unit; /* + * discard command's offset/size should + * be aligned to this unit: block, + * segment or section + */ struct fscrypt_dummy_policy dummy_enc_policy; /* test dummy encryption */ block_t unusable_cap_perc; /* percentage for cap */ block_t unusable_cap; /* Amount of space allowed to be @@ -1299,6 +1304,12 @@ enum { */ }; +enum { + DISCARD_UNIT_BLOCK, /* basic discard unit is block */ + DISCARD_UNIT_SEGMENT, /* basic discard unit is segment */ + DISCARD_UNIT_SECTION, /* basic discard unit is section */ +}; + static inline int f2fs_test_bit(unsigned int nr, char *addr); static inline void f2fs_set_bit(unsigned int nr, char *addr); static inline void f2fs_clear_bit(unsigned int nr, char *addr); @@ -4365,6 +4376,11 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) return false; } +static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi) +{ + return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK; +} + #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0f976cefe425..80f26158e304 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1893,7 +1893,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); offset = GET_BLKOFF_FROM_SEG0(sbi, i); - if (!f2fs_test_and_set_bit(offset, se->discard_map)) + if (f2fs_block_unit_discard(sbi) && + !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; } @@ -1918,7 +1919,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; - if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi)) + if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) || + !f2fs_block_unit_discard(sbi)) return false; if (!force) { @@ -2003,14 +2005,18 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); - bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); + bool section_alignment = F2FS_OPTION(sbi).discard_unit == + DISCARD_UNIT_SECTION; + + if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) + section_alignment = true; mutex_lock(&dirty_i->seglist_lock); while (1) { int i; - if (need_align && end != -1) + if (section_alignment && end != -1) end--; start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); if (start >= MAIN_SEGS(sbi)) @@ -2018,7 +2024,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), start + 1); - if (need_align) { + if (section_alignment) { start = rounddown(start, sbi->segs_per_sec); end = roundup(end, sbi->segs_per_sec); } @@ -2056,6 +2062,9 @@ next: } mutex_unlock(&dirty_i->seglist_lock); + if (!f2fs_block_unit_discard(sbi)) + goto wakeup; + /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { unsigned int cur_pos = 0, next_pos, len, total_len = 0; @@ -2089,6 +2098,7 @@ skip: dcc->nr_discards -= total_len; } +wakeup: wake_up_discard_thread(sbi, false); } @@ -2108,6 +2118,11 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) return -ENOMEM; dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) + dcc->discard_granularity = sbi->blocks_per_seg; + else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) + dcc->discard_granularity = BLKS_PER_SEC(sbi); + INIT_LIST_HEAD(&dcc->entry_list); for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); @@ -2255,7 +2270,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) del = 0; } - if (!f2fs_test_and_set_bit(offset, se->discard_map)) + if (f2fs_block_unit_discard(sbi) && + !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; /* @@ -2297,7 +2313,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) } } - if (f2fs_test_and_clear_bit(offset, se->discard_map)) + if (f2fs_block_unit_discard(sbi) && + f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -4282,6 +4299,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) unsigned int sit_segs, start; char *src_bitmap, *bitmap; unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size; + unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0; /* allocate memory for SIT information */ sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL); @@ -4304,9 +4322,9 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; #ifdef CONFIG_F2FS_CHECK_FS - bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4; + bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map); #else - bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3; + bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map); #endif sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); if (!sit_i->bitmap) @@ -4326,8 +4344,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi) bitmap += SIT_VBLOCK_MAP_SIZE; #endif - sit_i->sentries[start].discard_map = bitmap; - bitmap += SIT_VBLOCK_MAP_SIZE; + if (discard_map) { + sit_i->sentries[start].discard_map = bitmap; + bitmap += SIT_VBLOCK_MAP_SIZE; + } } sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); @@ -4489,17 +4509,19 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (IS_NODESEG(se->type)) total_node_blocks += se->valid_blocks; - /* build discard map only one time */ - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - - se->valid_blocks; + if (f2fs_block_unit_discard(sbi)) { + /* build discard map only one time */ + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; + } } if (__is_large_section(sbi)) @@ -4535,13 +4557,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (IS_NODESEG(se->type)) total_node_blocks += se->valid_blocks; - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks; - sbi->discard_blks -= se->valid_blocks; + if (f2fs_block_unit_discard(sbi)) { + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks; + sbi->discard_blks -= se->valid_blocks; + } } if (__is_large_section(sbi)) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3617aa5f0477..a4fed184b811 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -155,6 +155,7 @@ enum { Opt_atgc, Opt_gc_merge, Opt_nogc_merge, + Opt_discard_unit, Opt_err, }; @@ -231,6 +232,7 @@ static match_table_t f2fs_tokens = { {Opt_atgc, "atgc"}, {Opt_gc_merge, "gc_merge"}, {Opt_nogc_merge, "nogc_merge"}, + {Opt_discard_unit, "discard_unit=%s"}, {Opt_err, NULL}, }; @@ -1173,6 +1175,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_nogc_merge: clear_opt(sbi, GC_MERGE); break; + case Opt_discard_unit: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (!strcmp(name, "block")) { + F2FS_OPTION(sbi).discard_unit = + DISCARD_UNIT_BLOCK; + } else if (!strcmp(name, "segment")) { + F2FS_OPTION(sbi).discard_unit = + DISCARD_UNIT_SEGMENT; + } else if (!strcmp(name, "section")) { + F2FS_OPTION(sbi).discard_unit = + DISCARD_UNIT_SECTION; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1211,6 +1232,14 @@ default_check: return -EINVAL; } #endif + if (f2fs_sb_has_blkzoned(sbi)) { + if (F2FS_OPTION(sbi).discard_unit != + DISCARD_UNIT_SECTION) { + f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default"); + F2FS_OPTION(sbi).discard_unit = + DISCARD_UNIT_SECTION; + } + } #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_test_compress_extension(sbi)) { @@ -1925,6 +1954,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sbi, ATGC)) seq_puts(seq, ",atgc"); + + if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK) + seq_printf(seq, ",discard_unit=%s", "block"); + else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) + seq_printf(seq, ",discard_unit=%s", "segment"); + else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) + seq_printf(seq, ",discard_unit=%s", "section"); + return 0; } @@ -1961,10 +1998,13 @@ static void default_options(struct f2fs_sb_info *sbi) sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); - if (f2fs_sb_has_blkzoned(sbi)) + if (f2fs_sb_has_blkzoned(sbi)) { F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; - else + F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION; + } else { F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; + F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK; + } #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); @@ -2066,6 +2106,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE); + bool block_unit_discard = f2fs_block_unit_discard(sbi); #ifdef CONFIG_QUOTA int i, j; #endif @@ -2166,6 +2207,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (block_unit_discard != f2fs_block_unit_discard(sbi)) { + err = -EINVAL; + f2fs_warn(sbi, "switch discard_unit option is not allowed"); + goto restore_opts; + } + if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); @@ -3778,7 +3825,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) /* adjust parameters according to the volume size */ if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; - sm_i->dcc_info->discard_granularity = 1; + if (f2fs_block_unit_discard(sbi)) + sm_i->dcc_info->discard_granularity = 1; sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b1725620c07d..f98afd9f9380 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -428,6 +428,8 @@ out: if (!strcmp(a->attr.name, "discard_granularity")) { if (t == 0 || t > MAX_PLIST_NUM) return -EINVAL; + if (!f2fs_block_unit_discard(sbi)) + return -EINVAL; if (t == *ui) return count; *ui = t; -- cgit v1.2.3 From 91803392c732c43b5cf440e885ea89be7f5fecef Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Aug 2021 08:38:38 +0800 Subject: f2fs: fix to stop filesystem update once CP failed During f2fs_write_checkpoint(), once we failed in f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem such as prefree bitmap, nat/sit version bitmap won't be recovered, it may cause f2fs image to be inconsistent, let's just set CP error flag to avoid further updates until we figure out a scheme to rollback all metadatas in such condition. Reported-by: Yangtao Li Signed-off-by: Yangtao Li Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 12 +++++++++--- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 15 +++++++++++++-- 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6c208108d69c..7f6745f4630e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1639,8 +1639,11 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* write cached NAT/SIT entries to NAT/SIT area */ err = f2fs_flush_nat_entries(sbi, cpc); - if (err) + if (err) { + f2fs_err(sbi, "f2fs_flush_nat_entries failed err:%d, stop checkpoint", err); + f2fs_bug_on(sbi, !f2fs_cp_error(sbi)); goto stop; + } f2fs_flush_sit_entries(sbi, cpc); @@ -1648,10 +1651,13 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_save_inmem_curseg(sbi); err = do_checkpoint(sbi, cpc); - if (err) + if (err) { + f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err); + f2fs_bug_on(sbi, !f2fs_cp_error(sbi)); f2fs_release_discard_addrs(sbi); - else + } else { f2fs_clear_prefree_segments(sbi, cpc); + } f2fs_restore_inmem_curseg(sbi); stop: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1b4c482d08e2..d24fd5045712 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -547,7 +547,7 @@ enum { */ }; -#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ +#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */ /* congestion wait timeout value, default: 20ms */ #define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 80f26158e304..ca9876a6d396 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -776,11 +776,22 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) return 0; for (i = 1; i < sbi->s_ndevs; i++) { + int count = DEFAULT_RETRY_IO_COUNT; + if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) continue; - ret = __submit_flush_wait(sbi, FDEV(i).bdev); - if (ret) + + do { + ret = __submit_flush_wait(sbi, FDEV(i).bdev); + if (ret) + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); + } while (ret && --count); + + if (ret) { + f2fs_stop_checkpoint(sbi, false); break; + } spin_lock(&sbi->dev_lock); f2fs_clear_bit(i, (char *)&sbi->dirty_device); -- cgit v1.2.3 From 324105775c1982aacbd2972b7024d8cc06474abe Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Aug 2021 08:24:48 +0800 Subject: f2fs: support fault injection for f2fs_kmem_cache_alloc() This patch supports to inject fault into f2fs_kmem_cache_alloc(). Usage: a) echo 32768 > /sys/fs/f2fs//inject_type or b) mount -o fault_type=32768 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 1 + fs/f2fs/checkpoint.c | 3 ++- fs/f2fs/compress.c | 8 +++++--- fs/f2fs/data.c | 2 +- fs/f2fs/dir.c | 4 ++-- fs/f2fs/extent_cache.c | 5 +++-- fs/f2fs/f2fs.h | 17 ++++++++++++++++- fs/f2fs/gc.c | 6 ++++-- fs/f2fs/node.c | 23 ++++++++++++----------- fs/f2fs/recovery.c | 3 ++- fs/f2fs/segment.c | 10 ++++++---- fs/f2fs/super.c | 4 +++- fs/f2fs/xattr.c | 3 ++- 13 files changed, 59 insertions(+), 30 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 9b0517d90063..21d40e3cfd7a 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -195,6 +195,7 @@ fault_type=%d Support configuring fault injection type, should be FAULT_CHECKPOINT 0x000001000 FAULT_DISCARD 0x000002000 FAULT_WRITE_IO 0x000004000 + FAULT_SLAB_ALLOC 0x000008000 =================== =========== mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5b6ddeae1107..41960c55c343 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -475,7 +475,8 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, retry: if (!e) - new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); + new = f2fs_kmem_cache_alloc(ino_entry_slab, + GFP_NOFS, true, NULL); radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 7dbfd6965b97..afb79480c6a3 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -28,7 +28,8 @@ static void *page_array_alloc(struct inode *inode, int nr) unsigned int size = sizeof(struct page *) * nr; if (likely(size <= sbi->page_array_slab_size)) - return kmem_cache_zalloc(sbi->page_array_slab, GFP_NOFS); + return f2fs_kmem_cache_alloc(sbi->page_array_slab, + GFP_F2FS_ZERO, false, F2FS_I_SB(inode)); return f2fs_kzalloc(sbi, size, GFP_NOFS); } @@ -1228,7 +1229,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, fio.version = ni.version; - cic = kmem_cache_zalloc(cic_entry_slab, GFP_NOFS); + cic = f2fs_kmem_cache_alloc(cic_entry_slab, GFP_F2FS_ZERO, false, sbi); if (!cic) goto out_put_dnode; @@ -1506,7 +1507,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) pgoff_t start_idx = start_idx_of_cluster(cc); int i; - dic = kmem_cache_zalloc(dic_entry_slab, GFP_NOFS); + dic = f2fs_kmem_cache_alloc(dic_entry_slab, GFP_F2FS_ZERO, + false, F2FS_I_SB(cc->inode)); if (!dic) return ERR_PTR(-ENOMEM); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cec084806725..12cd63603137 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -724,7 +724,7 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; struct bio_entry *be; - be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); + be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL); be->bio = bio; bio_get(bio); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c250bf46ef5e..1820e9c106f7 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -83,8 +83,8 @@ int f2fs_init_casefolded_name(const struct inode *dir, struct super_block *sb = dir->i_sb; if (IS_CASEFOLDED(dir)) { - fname->cf_name.name = kmem_cache_alloc(f2fs_cf_name_slab, - GFP_NOFS); + fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab, + GFP_NOFS, false, F2FS_SB(sb)); if (!fname->cf_name.name) return -ENOMEM; fname->cf_name.len = utf8_casefold(sb->s_encoding, diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index b120589d8517..866e72b29bd5 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -239,7 +239,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, { struct extent_node *en; - en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); + en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); if (!en) return NULL; @@ -292,7 +292,8 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) mutex_lock(&sbi->extent_tree_lock); et = radix_tree_lookup(&sbi->extent_tree_root, ino); if (!et) { - et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); + et = f2fs_kmem_cache_alloc(extent_tree_slab, + GFP_NOFS, true, NULL); f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); memset(et, 0, sizeof(struct extent_tree)); et->ino = ino; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e97b4d8c5efc..13a7cfe9b23f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -53,6 +53,7 @@ enum { FAULT_CHECKPOINT, FAULT_DISCARD, FAULT_WRITE_IO, + FAULT_SLAB_ALLOC, FAULT_MAX, }; @@ -2618,7 +2619,7 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL); } -static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, +static inline void *f2fs_kmem_cache_alloc_nofail(struct kmem_cache *cachep, gfp_t flags) { void *entry; @@ -2629,6 +2630,20 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } +static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, + gfp_t flags, bool nofail, struct f2fs_sb_info *sbi) +{ + if (nofail) + return f2fs_kmem_cache_alloc_nofail(cachep, flags); + + if (time_to_inject(sbi, FAULT_SLAB_ALLOC)) { + f2fs_show_injection_info(sbi, FAULT_SLAB_ALLOC); + return NULL; + } + + return kmem_cache_alloc(cachep, flags); +} + static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) { if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9dce44619069..3bc0f0162e31 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -371,7 +371,8 @@ static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi, struct atgc_management *am = &sbi->am; struct victim_entry *ve; - ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS); + ve = f2fs_kmem_cache_alloc(victim_entry_slab, + GFP_NOFS, true, NULL); ve->mtime = mtime; ve->segno = segno; @@ -849,7 +850,8 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) iput(inode); return; } - new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, GFP_NOFS); + new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, + GFP_NOFS, true, NULL); new_ie->inode = inode; f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9d838a7929fb..161173de5a2d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -162,14 +162,13 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) return dst_page; } -static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail) +static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi, + nid_t nid, bool no_fail) { struct nat_entry *new; - if (no_fail) - new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO); - else - new = kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO); + new = f2fs_kmem_cache_alloc(nat_entry_slab, + GFP_F2FS_ZERO, no_fail, sbi); if (new) { nat_set_nid(new, nid); nat_reset_flag(new); @@ -242,7 +241,8 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { - head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); + head = f2fs_kmem_cache_alloc(nat_entry_set_slab, + GFP_NOFS, true, NULL); INIT_LIST_HEAD(&head->entry_list); INIT_LIST_HEAD(&head->set_list); @@ -329,7 +329,8 @@ static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, unsigned long flags; unsigned int seq_id; - fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS); + fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, + GFP_NOFS, true, NULL); get_page(page); fn->page = page; @@ -428,7 +429,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *new, *e; - new = __alloc_nat_entry(nid, false); + new = __alloc_nat_entry(sbi, nid, false); if (!new) return; @@ -451,7 +452,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - struct nat_entry *new = __alloc_nat_entry(ni->nid, true); + struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); @@ -2252,7 +2253,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, if (unlikely(f2fs_check_nid_range(sbi, nid))) return false; - i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); + i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS, true, NULL); i->nid = nid; i->state = FREE_NID; @@ -2842,7 +2843,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - ne = __alloc_nat_entry(nid, true); + ne = __alloc_nat_entry(sbi, nid, true); __init_nat_entry(nm_i, ne, &raw_ne, true); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 695eacfe776c..04655511d7f5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -91,7 +91,8 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, goto err_out; } - entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); + entry = f2fs_kmem_cache_alloc(fsync_entry_slab, + GFP_F2FS_ZERO, true, NULL); entry->inode = inode; list_add_tail(&entry->list, head); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ca9876a6d396..b4dd22134a73 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -188,7 +188,8 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page) set_page_private_atomic(page); - new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); + new = f2fs_kmem_cache_alloc(inmem_entry_slab, + GFP_NOFS, true, NULL); /* add atomic page indices to the list */ new->page = page; @@ -1001,7 +1002,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, pend_list = &dcc->pend_list[plist_idx(len)]; - dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); + dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL); INIT_LIST_HEAD(&dc->list); dc->bdev = bdev; dc->lstart = lstart; @@ -1962,7 +1963,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, if (!de) { de = f2fs_kmem_cache_alloc(discard_entry_slab, - GFP_F2FS_ZERO); + GFP_F2FS_ZERO, true, NULL); de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); list_add_tail(&de->list, head); } @@ -4099,7 +4100,8 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, static struct sit_entry_set *grab_sit_entry_set(void) { struct sit_entry_set *ses = - f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS); + f2fs_kmem_cache_alloc(sit_entry_set_slab, + GFP_NOFS, true, NULL); ses->entry_cnt = 0; INIT_LIST_HEAD(&ses->set_list); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9e0e3c998142..b556ca38f0fb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -56,6 +56,7 @@ const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_CHECKPOINT] = "checkpoint error", [FAULT_DISCARD] = "discard error", [FAULT_WRITE_IO] = "write IO error", + [FAULT_SLAB_ALLOC] = "slab alloc", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, @@ -1300,7 +1301,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) { struct f2fs_inode_info *fi; - fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO); + fi = f2fs_kmem_cache_alloc(f2fs_inode_cachep, + GFP_F2FS_ZERO, false, F2FS_SB(sb)); if (!fi) return NULL; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index c8f34decbf8e..1d2d29dcd41c 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -27,7 +27,8 @@ static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline) { if (likely(size == sbi->inline_xattr_slab_size)) { *is_inline = true; - return kmem_cache_zalloc(sbi->inline_xattr_slab, GFP_NOFS); + return f2fs_kmem_cache_alloc(sbi->inline_xattr_slab, + GFP_F2FS_ZERO, false, sbi); } *is_inline = false; return f2fs_kzalloc(sbi, size, GFP_NOFS); -- cgit v1.2.3 From 521187439abfb3e1c946796dc2187c443e5457ab Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 19 Aug 2021 20:52:28 -0700 Subject: f2fs: separate out iostat feature Added F2FS_IOSTAT config option to support getting IO statistics through sysfs and printing out periodic IO statistics tracepoint events and moved I/O statistics related codes into separate files for better maintenance. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu [Jaegeuk Kim: set default=y] Signed-off-by: Jaegeuk Kim --- fs/f2fs/Kconfig | 9 +++ fs/f2fs/Makefile | 1 + fs/f2fs/checkpoint.c | 1 + fs/f2fs/data.c | 1 + fs/f2fs/f2fs.h | 59 +++-------------- fs/f2fs/file.c | 1 + fs/f2fs/gc.c | 1 + fs/f2fs/iostat.c | 154 ++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/iostat.h | 27 ++++++++ fs/f2fs/node.c | 1 + fs/f2fs/segment.c | 1 + fs/f2fs/super.c | 10 +-- fs/f2fs/sysfs.c | 106 ++++-------------------------- include/trace/events/f2fs.h | 2 + 14 files changed, 225 insertions(+), 149 deletions(-) create mode 100644 fs/f2fs/iostat.c create mode 100644 fs/f2fs/iostat.h (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 2ac026fc3564..7eea3cfd894d 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -133,3 +133,12 @@ config F2FS_FS_ZSTD default y help Support ZSTD compress algorithm, if unsure, say Y. + +config F2FS_IOSTAT + bool "F2FS IO statistics information" + depends on F2FS_FS + default y + help + Support getting IO statistics through sysfs and printing out periodic + IO statistics tracepoint events. You have to turn on "iostat_enable" + sysfs node to enable this feature. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index e5295746208b..8a7322d229e4 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -9,3 +9,4 @@ f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_FS_VERITY) += verity.o f2fs-$(CONFIG_F2FS_FS_COMPRESSION) += compress.o +f2fs-$(CONFIG_F2FS_IOSTAT) += iostat.o diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 41960c55c343..3962cfeb4a57 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -18,6 +18,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "iostat.h" #include #define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e4e4eb800d2b..fd16c4fc4507 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -25,6 +25,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "iostat.h" #include #define NUM_PREALLOC_POST_READ_CTXS 128 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fe5f280f6ac0..12ecf6ee9cb5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1713,14 +1713,6 @@ struct f2fs_sb_info { #endif spinlock_t stat_lock; /* lock for stat operations */ - /* For app/fs IO statistics */ - spinlock_t iostat_lock; - unsigned long long rw_iostat[NR_IO_TYPE]; - unsigned long long prev_rw_iostat[NR_IO_TYPE]; - bool iostat_enable; - unsigned long iostat_next_period; - unsigned int iostat_period_ms; - /* to attach REQ_META|REQ_FUA flags */ unsigned int data_io_flag; unsigned int node_io_flag; @@ -1780,6 +1772,16 @@ struct f2fs_sb_info { unsigned int compress_watermark; /* cache page watermark */ atomic_t compress_page_hit; /* cache hit count */ #endif + +#ifdef CONFIG_F2FS_IOSTAT + /* For app/fs IO statistics */ + spinlock_t iostat_lock; + unsigned long long rw_iostat[NR_IO_TYPE]; + unsigned long long prev_rw_iostat[NR_IO_TYPE]; + bool iostat_enable; + unsigned long iostat_next_period; + unsigned int iostat_period_ms; +#endif }; struct f2fs_private_dio { @@ -3257,47 +3259,6 @@ static inline int get_inline_xattr_addrs(struct inode *inode) sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ -#define DEFAULT_IOSTAT_PERIOD_MS 3000 -#define MIN_IOSTAT_PERIOD_MS 100 -/* maximum period of iostat tracing is 1 day */ -#define MAX_IOSTAT_PERIOD_MS 8640000 - -static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) -{ - int i; - - spin_lock(&sbi->iostat_lock); - for (i = 0; i < NR_IO_TYPE; i++) { - sbi->rw_iostat[i] = 0; - sbi->prev_rw_iostat[i] = 0; - } - spin_unlock(&sbi->iostat_lock); -} - -extern void f2fs_record_iostat(struct f2fs_sb_info *sbi); - -static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, - enum iostat_type type, unsigned long long io_bytes) -{ - if (!sbi->iostat_enable) - return; - spin_lock(&sbi->iostat_lock); - sbi->rw_iostat[type] += io_bytes; - - if (type == APP_WRITE_IO || type == APP_DIRECT_IO) - sbi->rw_iostat[APP_BUFFERED_IO] = - sbi->rw_iostat[APP_WRITE_IO] - - sbi->rw_iostat[APP_DIRECT_IO]; - - if (type == APP_READ_IO || type == APP_DIRECT_READ_IO) - sbi->rw_iostat[APP_BUFFERED_READ_IO] = - sbi->rw_iostat[APP_READ_IO] - - sbi->rw_iostat[APP_DIRECT_READ_IO]; - spin_unlock(&sbi->iostat_lock); - - f2fs_record_iostat(sbi); -} - #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d4fc5e0d2ffe..ab4ea2ddcc8b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -31,6 +31,7 @@ #include "xattr.h" #include "acl.h" #include "gc.h" +#include "iostat.h" #include #include diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3bc0f0162e31..2c18443972b6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -19,6 +19,7 @@ #include "node.h" #include "segment.h" #include "gc.h" +#include "iostat.h" #include static struct kmem_cache *victim_entry_slab; diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c new file mode 100644 index 000000000000..21c29e121a86 --- /dev/null +++ b/fs/f2fs/iostat.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * f2fs iostat support + * + * Copyright 2021 Google LLC + * Author: Daeho Jeong + */ + +#include +#include +#include + +#include "f2fs.h" +#include "iostat.h" +#include + +int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + time64_t now = ktime_get_real_seconds(); + + if (!sbi->iostat_enable) + return 0; + + seq_printf(seq, "time: %-16llu\n", now); + + /* print app write IOs */ + seq_puts(seq, "[WRITE]\n"); + seq_printf(seq, "app buffered: %-16llu\n", + sbi->rw_iostat[APP_BUFFERED_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->rw_iostat[APP_DIRECT_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->rw_iostat[APP_MAPPED_IO]); + + /* print fs write IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->rw_iostat[FS_DATA_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->rw_iostat[FS_NODE_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->rw_iostat[FS_META_IO]); + seq_printf(seq, "fs gc data: %-16llu\n", + sbi->rw_iostat[FS_GC_DATA_IO]); + seq_printf(seq, "fs gc node: %-16llu\n", + sbi->rw_iostat[FS_GC_NODE_IO]); + seq_printf(seq, "fs cp data: %-16llu\n", + sbi->rw_iostat[FS_CP_DATA_IO]); + seq_printf(seq, "fs cp node: %-16llu\n", + sbi->rw_iostat[FS_CP_NODE_IO]); + seq_printf(seq, "fs cp meta: %-16llu\n", + sbi->rw_iostat[FS_CP_META_IO]); + + /* print app read IOs */ + seq_puts(seq, "[READ]\n"); + seq_printf(seq, "app buffered: %-16llu\n", + sbi->rw_iostat[APP_BUFFERED_READ_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->rw_iostat[APP_DIRECT_READ_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->rw_iostat[APP_MAPPED_READ_IO]); + + /* print fs read IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->rw_iostat[FS_DATA_READ_IO]); + seq_printf(seq, "fs gc data: %-16llu\n", + sbi->rw_iostat[FS_GDATA_READ_IO]); + seq_printf(seq, "fs compr_data: %-16llu\n", + sbi->rw_iostat[FS_CDATA_READ_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->rw_iostat[FS_NODE_READ_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->rw_iostat[FS_META_READ_IO]); + + /* print other IOs */ + seq_puts(seq, "[OTHER]\n"); + seq_printf(seq, "fs discard: %-16llu\n", + sbi->rw_iostat[FS_DISCARD]); + + return 0; +} + +static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) +{ + unsigned long long iostat_diff[NR_IO_TYPE]; + int i; + + if (time_is_after_jiffies(sbi->iostat_next_period)) + return; + + /* Need double check under the lock */ + spin_lock(&sbi->iostat_lock); + if (time_is_after_jiffies(sbi->iostat_next_period)) { + spin_unlock(&sbi->iostat_lock); + return; + } + sbi->iostat_next_period = jiffies + + msecs_to_jiffies(sbi->iostat_period_ms); + + for (i = 0; i < NR_IO_TYPE; i++) { + iostat_diff[i] = sbi->rw_iostat[i] - + sbi->prev_rw_iostat[i]; + sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; + } + spin_unlock(&sbi->iostat_lock); + + trace_f2fs_iostat(sbi, iostat_diff); +} + +void f2fs_reset_iostat(struct f2fs_sb_info *sbi) +{ + int i; + + spin_lock(&sbi->iostat_lock); + for (i = 0; i < NR_IO_TYPE; i++) { + sbi->rw_iostat[i] = 0; + sbi->prev_rw_iostat[i] = 0; + } + spin_unlock(&sbi->iostat_lock); +} + +void f2fs_update_iostat(struct f2fs_sb_info *sbi, + enum iostat_type type, unsigned long long io_bytes) +{ + if (!sbi->iostat_enable) + return; + + spin_lock(&sbi->iostat_lock); + sbi->rw_iostat[type] += io_bytes; + + if (type == APP_WRITE_IO || type == APP_DIRECT_IO) + sbi->rw_iostat[APP_BUFFERED_IO] = + sbi->rw_iostat[APP_WRITE_IO] - + sbi->rw_iostat[APP_DIRECT_IO]; + + if (type == APP_READ_IO || type == APP_DIRECT_READ_IO) + sbi->rw_iostat[APP_BUFFERED_READ_IO] = + sbi->rw_iostat[APP_READ_IO] - + sbi->rw_iostat[APP_DIRECT_READ_IO]; + spin_unlock(&sbi->iostat_lock); + + f2fs_record_iostat(sbi); +} + +int f2fs_init_iostat(struct f2fs_sb_info *sbi) +{ + /* init iostat info */ + spin_lock_init(&sbi->iostat_lock); + sbi->iostat_enable = false; + sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS; + + return 0; +} diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h new file mode 100644 index 000000000000..46e4a36fc8e9 --- /dev/null +++ b/fs/f2fs/iostat.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2021 Google LLC + * Author: Daeho Jeong + */ +#ifndef __F2FS_IOSTAT_H__ +#define __F2FS_IOSTAT_H__ + +#ifdef CONFIG_F2FS_IOSTAT + +#define DEFAULT_IOSTAT_PERIOD_MS 3000 +#define MIN_IOSTAT_PERIOD_MS 100 +/* maximum period of iostat tracing is 1 day */ +#define MAX_IOSTAT_PERIOD_MS 8640000 + +extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq, + void *offset); +extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi); +extern void f2fs_update_iostat(struct f2fs_sb_info *sbi, + enum iostat_type type, unsigned long long io_bytes); +extern int f2fs_init_iostat(struct f2fs_sb_info *sbi); +#else +static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, + enum iostat_type type, unsigned long long io_bytes) {} +static inline int f2fs_init_iostat(struct f2fs_sb_info *sbi) { return 0; } +#endif +#endif /* __F2FS_IOSTAT_H__ */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 161173de5a2d..043cb831b289 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -17,6 +17,7 @@ #include "node.h" #include "segment.h" #include "xattr.h" +#include "iostat.h" #include #define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b4dd22134a73..73abec9988e9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -20,6 +20,7 @@ #include "segment.h" #include "node.h" #include "gc.h" +#include "iostat.h" #include #define __reverse_ffz(x) __reverse_ffs(~(x)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b556ca38f0fb..a23926d1a77b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -33,6 +33,7 @@ #include "segment.h" #include "xattr.h" #include "gc.h" +#include "iostat.h" #define CREATE_TRACE_POINTS #include @@ -3964,11 +3965,6 @@ try_onemore: set_sbi_flag(sbi, SBI_POR_DOING); spin_lock_init(&sbi->stat_lock); - /* init iostat info */ - spin_lock_init(&sbi->iostat_lock); - sbi->iostat_enable = false; - sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS; - for (i = 0; i < NR_PAGE_TYPE; i++) { int n = (i == META) ? 1 : NR_TEMP_TYPE; int j; @@ -3999,6 +3995,10 @@ try_onemore: init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); + err = f2fs_init_iostat(sbi); + if (err) + goto free_bio_info; + err = init_percpu_info(sbi); if (err) goto free_bio_info; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 36d7e40bf12e..a1a3e0f6d658 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -17,6 +17,7 @@ #include "f2fs.h" #include "segment.h" #include "gc.h" +#include "iostat.h" #include static struct proc_dir_entry *f2fs_proc_root; @@ -477,6 +478,7 @@ out: return count; } +#ifdef CONFIG_F2FS_IOSTAT if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; if (!sbi->iostat_enable) @@ -492,6 +494,7 @@ out: spin_unlock(&sbi->iostat_lock); return count; } +#endif #ifdef CONFIG_F2FS_FS_COMPRESSION if (!strcmp(a->attr.name, "compr_written_block") || @@ -700,8 +703,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval, F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); +#ifdef CONFIG_F2FS_IOSTAT F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); +#endif F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_io_bytes, max_io_bytes); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); @@ -807,8 +812,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(discard_idle_interval), ATTR_LIST(gc_idle_interval), ATTR_LIST(umount_discard_timeout), +#ifdef CONFIG_F2FS_IOSTAT ATTR_LIST(iostat_enable), ATTR_LIST(iostat_period_ms), +#endif ATTR_LIST(readdir_ra), ATTR_LIST(max_io_bytes), ATTR_LIST(gc_pin_file_thresh), @@ -1076,101 +1083,6 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, return 0; } -void f2fs_record_iostat(struct f2fs_sb_info *sbi) -{ - unsigned long long iostat_diff[NR_IO_TYPE]; - int i; - - if (time_is_after_jiffies(sbi->iostat_next_period)) - return; - - /* Need double check under the lock */ - spin_lock(&sbi->iostat_lock); - if (time_is_after_jiffies(sbi->iostat_next_period)) { - spin_unlock(&sbi->iostat_lock); - return; - } - sbi->iostat_next_period = jiffies + - msecs_to_jiffies(sbi->iostat_period_ms); - - for (i = 0; i < NR_IO_TYPE; i++) { - iostat_diff[i] = sbi->rw_iostat[i] - - sbi->prev_rw_iostat[i]; - sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; - } - spin_unlock(&sbi->iostat_lock); - - trace_f2fs_iostat(sbi, iostat_diff); -} - -static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, - void *offset) -{ - struct super_block *sb = seq->private; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - time64_t now = ktime_get_real_seconds(); - - if (!sbi->iostat_enable) - return 0; - - seq_printf(seq, "time: %-16llu\n", now); - - /* print app write IOs */ - seq_puts(seq, "[WRITE]\n"); - seq_printf(seq, "app buffered: %-16llu\n", - sbi->rw_iostat[APP_BUFFERED_IO]); - seq_printf(seq, "app direct: %-16llu\n", - sbi->rw_iostat[APP_DIRECT_IO]); - seq_printf(seq, "app mapped: %-16llu\n", - sbi->rw_iostat[APP_MAPPED_IO]); - - /* print fs write IOs */ - seq_printf(seq, "fs data: %-16llu\n", - sbi->rw_iostat[FS_DATA_IO]); - seq_printf(seq, "fs node: %-16llu\n", - sbi->rw_iostat[FS_NODE_IO]); - seq_printf(seq, "fs meta: %-16llu\n", - sbi->rw_iostat[FS_META_IO]); - seq_printf(seq, "fs gc data: %-16llu\n", - sbi->rw_iostat[FS_GC_DATA_IO]); - seq_printf(seq, "fs gc node: %-16llu\n", - sbi->rw_iostat[FS_GC_NODE_IO]); - seq_printf(seq, "fs cp data: %-16llu\n", - sbi->rw_iostat[FS_CP_DATA_IO]); - seq_printf(seq, "fs cp node: %-16llu\n", - sbi->rw_iostat[FS_CP_NODE_IO]); - seq_printf(seq, "fs cp meta: %-16llu\n", - sbi->rw_iostat[FS_CP_META_IO]); - - /* print app read IOs */ - seq_puts(seq, "[READ]\n"); - seq_printf(seq, "app buffered: %-16llu\n", - sbi->rw_iostat[APP_BUFFERED_READ_IO]); - seq_printf(seq, "app direct: %-16llu\n", - sbi->rw_iostat[APP_DIRECT_READ_IO]); - seq_printf(seq, "app mapped: %-16llu\n", - sbi->rw_iostat[APP_MAPPED_READ_IO]); - - /* print fs read IOs */ - seq_printf(seq, "fs data: %-16llu\n", - sbi->rw_iostat[FS_DATA_READ_IO]); - seq_printf(seq, "fs gc data: %-16llu\n", - sbi->rw_iostat[FS_GDATA_READ_IO]); - seq_printf(seq, "fs compr_data: %-16llu\n", - sbi->rw_iostat[FS_CDATA_READ_IO]); - seq_printf(seq, "fs node: %-16llu\n", - sbi->rw_iostat[FS_NODE_READ_IO]); - seq_printf(seq, "fs meta: %-16llu\n", - sbi->rw_iostat[FS_META_READ_IO]); - - /* print other IOs */ - seq_puts(seq, "[OTHER]\n"); - seq_printf(seq, "fs discard: %-16llu\n", - sbi->rw_iostat[FS_DISCARD]); - - return 0; -} - static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, void *offset) { @@ -1257,8 +1169,10 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) segment_info_seq_show, sb); proc_create_single_data("segment_bits", 0444, sbi->s_proc, segment_bits_seq_show, sb); +#ifdef CONFIG_F2FS_IOSTAT proc_create_single_data("iostat_info", 0444, sbi->s_proc, iostat_info_seq_show, sb); +#endif proc_create_single_data("victim_bits", 0444, sbi->s_proc, victim_bits_seq_show, sb); } @@ -1278,7 +1192,9 @@ put_sb_kobj: void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) { if (sbi->s_proc) { +#ifdef CONFIG_F2FS_IOSTAT remove_proc_entry("iostat_info", sbi->s_proc); +#endif remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry("victim_bits", sbi->s_proc); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 56b113e3cd6a..3eaf19aa89af 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1818,6 +1818,7 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, TP_ARGS(inode, cluster_idx, compressed_size, ret) ); +#ifdef CONFIG_F2FS_IOSTAT TRACE_EVENT(f2fs_iostat, TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat), @@ -1893,6 +1894,7 @@ TRACE_EVENT(f2fs_iostat, __entry->app_mrio, __entry->fs_drio, __entry->fs_gdrio, __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio) ); +#endif TRACE_EVENT(f2fs_bmap, -- cgit v1.2.3 From 4d67490498acb4ffcef5ba7bc44990d46e66a44c Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Thu, 19 Aug 2021 16:02:37 +0800 Subject: f2fs: Don't create discard thread when device doesn't support realtime discard Don't create discard thread when device doesn't support realtime discard or user specifies nodiscard mount option. Signed-off-by: Fengnan Chang Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 25 +++++++++++++++++++------ fs/f2fs/super.c | 27 ++++++++++++++++++++++++++- 3 files changed, 46 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 30134307a404..c24f03e054cb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3448,6 +3448,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); +int f2fs_start_discard_thread(struct f2fs_sb_info *sbi); void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 73abec9988e9..a135d2247415 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2115,9 +2115,25 @@ wakeup: wake_up_discard_thread(sbi, false); } -static int create_discard_cmd_control(struct f2fs_sb_info *sbi) +int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int err = 0; + + if (!f2fs_realtime_discard_enable(sbi)) + return 0; + + dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, + "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(dcc->f2fs_issue_discard)) + err = PTR_ERR(dcc->f2fs_issue_discard); + + return err; +} + +static int create_discard_cmd_control(struct f2fs_sb_info *sbi) +{ struct discard_cmd_control *dcc; int err = 0, i; @@ -2155,13 +2171,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; init_thread: - dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, - "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(dcc->f2fs_issue_discard)) { - err = PTR_ERR(dcc->f2fs_issue_discard); + err = f2fs_start_discard_thread(sbi); + if (err) { kfree(dcc); SM_I(sbi)->dcc_info = NULL; - return err; } return err; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f5148f2fd884..d18271f3d190 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2105,12 +2105,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_gc = false, need_stop_gc = false; bool need_restart_ckpt = false, need_stop_ckpt = false; bool need_restart_flush = false, need_stop_flush = false; + bool need_restart_discard = false, need_stop_discard = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT); bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); + bool no_discard = !test_opt(sbi, DISCARD); bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE); bool block_unit_discard = f2fs_block_unit_discard(sbi); + struct discard_cmd_control *dcc; #ifdef CONFIG_QUOTA int i, j; #endif @@ -2282,11 +2285,26 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_flush = true; } + if (no_discard == !!test_opt(sbi, DISCARD)) { + if (test_opt(sbi, DISCARD)) { + err = f2fs_start_discard_thread(sbi); + if (err) + goto restore_flush; + need_stop_discard = true; + } else { + dcc = SM_I(sbi)->dcc_info; + f2fs_stop_discard_thread(sbi); + if (atomic_read(&dcc->discard_cmd_cnt)) + f2fs_issue_discard_timeout(sbi); + need_restart_discard = true; + } + } + if (enable_checkpoint == !!test_opt(sbi, DISABLE_CHECKPOINT)) { if (test_opt(sbi, DISABLE_CHECKPOINT)) { err = f2fs_disable_checkpoint(sbi); if (err) - goto restore_flush; + goto restore_discard; } else { f2fs_enable_checkpoint(sbi); } @@ -2306,6 +2324,13 @@ skip: adjust_unusable_cap_perc(sbi); *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); return 0; +restore_discard: + if (need_restart_discard) { + if (f2fs_start_discard_thread(sbi)) + f2fs_warn(sbi, "discard has been stopped"); + } else if (need_stop_discard) { + f2fs_stop_discard_thread(sbi); + } restore_flush: if (need_restart_flush) { if (f2fs_create_flush_cmd_control(sbi)) -- cgit v1.2.3