From 3d7bfea8b8378277a25b42b28fe5a2a5ca76a7cf Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 8 Jul 2020 02:12:34 -0700 Subject: unicode: Add utf8_casefold_hash This adds a case insensitive hash function to allow taking the hash without needing to allocate a casefolded copy of the string. The existing d_hash implementations for casefolding allocate memory within rcu-walk, by avoiding it we can be more efficient and avoid worrying about a failed allocation. Signed-off-by: Daniel Rosenberg Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- include/linux/unicode.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 990aa97d8049..74484d44c755 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str); + struct unicode_map *utf8_load(const char *version); void utf8_unload(struct unicode_map *um); -- cgit v1.2.3 From c843843e714c8f17280d7db009412b1b1baf448b Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 8 Jul 2020 02:12:35 -0700 Subject: fs: Add standard casefolding support This adds general supporting functions for filesystems that use utf8 casefolding. It provides standard dentry_operations and adds the necessary structures in struct super_block to allow this standardization. The new dentry operations are functionally equivalent to the existing operations in ext4 and f2fs, apart from the use of utf8_casefold_hash to avoid an allocation. By providing a common implementation, all users can benefit from any optimizations without needing to port over improvements. Signed-off-by: Daniel Rosenberg Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/libfs.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 16 ++++++++++ 2 files changed, 103 insertions(+) (limited to 'include/linux') diff --git a/fs/libfs.c b/fs/libfs.c index e0d42e977d9a..fc34361c1489 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include @@ -1363,3 +1365,88 @@ bool is_empty_dir_inode(struct inode *inode) return (inode->i_fop == &empty_dir_operations) && (inode->i_op == &empty_dir_inode_operations); } + +#ifdef CONFIG_UNICODE +/* + * Determine if the name of a dentry should be casefolded. + * + * Return: if names will need casefolding + */ +static bool needs_casefold(const struct inode *dir) +{ + return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding; +} + +/** + * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems + * @dentry: dentry whose name we are checking against + * @len: len of name of dentry + * @str: str pointer to name of dentry + * @name: Name to compare against + * + * Return: 0 if names match, 1 if mismatch, or -ERRNO + */ +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *dir = READ_ONCE(parent->d_inode); + const struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + struct qstr qstr = QSTR_INIT(str, len); + char strbuf[DNAME_INLINE_LEN]; + int ret; + + if (!dir || !needs_casefold(dir)) + goto fallback; + /* + * If the dentry name is stored in-line, then it may be concurrently + * modified by a rename. If this happens, the VFS will eventually retry + * the lookup, so it doesn't matter what ->d_compare() returns. + * However, it's unsafe to call utf8_strncasecmp() with an unstable + * string. Therefore, we have to copy the name into a temporary buffer. + */ + if (len <= DNAME_INLINE_LEN - 1) { + memcpy(strbuf, str, len); + strbuf[len] = 0; + qstr.name = strbuf; + /* prevent compiler from optimizing out the temporary buffer */ + barrier(); + } + ret = utf8_strncasecmp(um, name, &qstr); + if (ret >= 0) + return ret; + + if (sb_has_strict_encoding(sb)) + return -EINVAL; +fallback: + if (len != name->len) + return 1; + return !!memcmp(str, name->name, len); +} +EXPORT_SYMBOL(generic_ci_d_compare); + +/** + * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems + * @dentry: dentry of the parent directory + * @str: qstr of name whose hash we should fill in + * + * Return: 0 if hash was successful or unchanged, and -EINVAL on error + */ +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) +{ + const struct inode *dir = READ_ONCE(dentry->d_inode); + struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + int ret = 0; + + if (!dir || !needs_casefold(dir)) + return 0; + + ret = utf8_casefold_hash(um, dentry, str); + if (ret < 0 && sb_has_strict_encoding(sb)) + return -EINVAL; + return 0; +} +EXPORT_SYMBOL(generic_ci_d_hash); +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 7519ae003a08..bc5417c61e12 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1371,6 +1371,12 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_ACTIVE (1<<30) #define SB_NOUSER (1<<31) +/* These flags relate to encoding and casefolding */ +#define SB_ENC_STRICT_MODE_FL (1 << 0) + +#define sb_has_strict_encoding(sb) \ + (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) + /* * Umount options */ @@ -1440,6 +1446,10 @@ struct super_block { #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; +#endif +#ifdef CONFIG_UNICODE + struct unicode_map *s_encoding; + __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ @@ -3262,6 +3272,12 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); +#ifdef CONFIG_UNICODE +extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); +extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); +#endif + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *, -- cgit v1.2.3 From eca4873ee1b63ee051e0eed91099fa42c97b2438 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 8 Jul 2020 02:12:36 -0700 Subject: f2fs: Use generic casefolding support This switches f2fs over to the generic support provided in the previous patch. Since casefolded dentries behave the same in ext4 and f2fs, we decrease the maintenance burden by unifying them, and any optimizations will immediately apply to both. Signed-off-by: Daniel Rosenberg Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 84 ++++++------------------------------------------- fs/f2fs/f2fs.h | 4 --- fs/f2fs/super.c | 10 +++--- fs/f2fs/sysfs.c | 10 +++--- include/linux/f2fs_fs.h | 3 -- 5 files changed, 20 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 069f498af1e3..a18f839b6fb2 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -75,21 +75,22 @@ int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname) { #ifdef CONFIG_UNICODE - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); if (IS_CASEFOLDED(dir)) { fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, GFP_NOFS); if (!fname->cf_name.name) return -ENOMEM; - fname->cf_name.len = utf8_casefold(sbi->s_encoding, + fname->cf_name.len = utf8_casefold(sb->s_encoding, fname->usr_fname, fname->cf_name.name, F2FS_NAME_LEN); if ((int)fname->cf_name.len <= 0) { kfree(fname->cf_name.name); fname->cf_name.name = NULL; - if (f2fs_has_strict_mode(sbi)) + if (sb_has_strict_encoding(sb)) return -EINVAL; /* fall back to treating name as opaque byte sequence */ } @@ -215,8 +216,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, const u8 *de_name, u32 de_name_len) { - const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct super_block *sb = dir->i_sb; + const struct unicode_map *um = sb->s_encoding; struct qstr entry = QSTR_INIT(de_name, de_name_len); int res; @@ -226,7 +227,7 @@ static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, * In strict mode, ignore invalid names. In non-strict mode, * fall back to treating them as opaque byte sequences. */ - if (f2fs_has_strict_mode(sbi) || name->len != entry.len) + if (sb_has_strict_encoding(sb) || name->len != entry.len) return false; return !memcmp(name->name, entry.name, name->len); } @@ -1107,75 +1108,8 @@ const struct file_operations f2fs_dir_operations = { }; #ifdef CONFIG_UNICODE -static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, - const char *str, const struct qstr *name) -{ - const struct dentry *parent = READ_ONCE(dentry->d_parent); - const struct inode *dir = READ_ONCE(parent->d_inode); - const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - struct qstr entry = QSTR_INIT(str, len); - char strbuf[DNAME_INLINE_LEN]; - int res; - - if (!dir || !IS_CASEFOLDED(dir)) - goto fallback; - - /* - * If the dentry name is stored in-line, then it may be concurrently - * modified by a rename. If this happens, the VFS will eventually retry - * the lookup, so it doesn't matter what ->d_compare() returns. - * However, it's unsafe to call utf8_strncasecmp() with an unstable - * string. Therefore, we have to copy the name into a temporary buffer. - */ - if (len <= DNAME_INLINE_LEN - 1) { - memcpy(strbuf, str, len); - strbuf[len] = 0; - entry.name = strbuf; - /* prevent compiler from optimizing out the temporary buffer */ - barrier(); - } - - res = utf8_strncasecmp(sbi->s_encoding, name, &entry); - if (res >= 0) - return res; - - if (f2fs_has_strict_mode(sbi)) - return -EINVAL; -fallback: - if (len != name->len) - return 1; - return !!memcmp(str, name->name, len); -} - -static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - const struct unicode_map *um = sbi->s_encoding; - const struct inode *inode = READ_ONCE(dentry->d_inode); - unsigned char *norm; - int len, ret = 0; - - if (!inode || !IS_CASEFOLDED(inode)) - return 0; - - norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); - if (!norm) - return -ENOMEM; - - len = utf8_casefold(um, str, norm, PATH_MAX); - if (len < 0) { - if (f2fs_has_strict_mode(sbi)) - ret = -EINVAL; - goto out; - } - str->hash = full_name_hash(dentry, norm, len); -out: - kvfree(norm); - return ret; -} - const struct dentry_operations f2fs_dentry_ops = { - .d_hash = f2fs_d_hash, - .d_compare = f2fs_d_compare, + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, }; #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 413d0a3ee78d..7446ca639140 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1414,10 +1414,6 @@ struct f2fs_sb_info { int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ -#ifdef CONFIG_UNICODE - struct unicode_map *s_encoding; - __u16 s_encoding_flags; -#endif #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 17e4008141e6..426d6578d2c9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1282,7 +1282,7 @@ static void f2fs_put_super(struct super_block *sb) for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif kfree(sbi); } @@ -3359,7 +3359,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) { #ifdef CONFIG_UNICODE - if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) { + if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) { const struct f2fs_sb_encodings *encoding_info; struct unicode_map *encoding; __u16 encoding_flags; @@ -3390,8 +3390,8 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) "%s-%s with flags 0x%hx", encoding_info->name, encoding_info->version?:"\b", encoding_flags); - sbi->s_encoding = encoding; - sbi->s_encoding_flags = encoding_flags; + sbi->sb->s_encoding = encoding; + sbi->sb->s_encoding_flags = encoding_flags; sbi->sb->s_d_op = &f2fs_dentry_ops; } #else @@ -3887,7 +3887,7 @@ free_bio_info: kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif free_options: #ifdef CONFIG_QUOTA diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 88ed9969cc86..70ad87d5492f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -176,12 +176,14 @@ static ssize_t encoding_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { #ifdef CONFIG_UNICODE + struct super_block *sb = sbi->sb; + if (f2fs_sb_has_casefold(sbi)) return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", - sbi->s_encoding->charset, - (sbi->s_encoding->version >> 16) & 0xff, - (sbi->s_encoding->version >> 8) & 0xff, - sbi->s_encoding->version & 0xff); + sb->s_encoding->charset, + (sb->s_encoding->version >> 16) & 0xff, + (sb->s_encoding->version >> 8) & 0xff, + sb->s_encoding->version & 0xff); #endif return sprintf(buf, "(none)"); } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 3c383ddd92dd..a5dbb57a687f 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -38,9 +38,6 @@ #define F2FS_MAX_QUOTAS 3 #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_ENC_STRICT_MODE_FL (1 << 0) -#define f2fs_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL) #define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ -- cgit v1.2.3