diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-06-23 12:33:24 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-06-23 12:33:24 -0700 |
commit | 93817be8b62c7fa1f1bdc3e8c037a73a60026be9 (patch) | |
tree | 873c207abc783edb4ead73b2b3a8a9b105f94420 /fs | |
parent | ccb9bc1dfa444e3541622ccfff135e83d2a569d1 (diff) | |
parent | 399bd66e219e331976fe6fa6ab81a023c0c97870 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
No conflicts.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/9p/fid.c | 22 | ||||
-rw-r--r-- | fs/9p/vfs_addr.c | 13 | ||||
-rw-r--r-- | fs/9p/vfs_inode.c | 8 | ||||
-rw-r--r-- | fs/9p/vfs_inode_dotl.c | 3 | ||||
-rw-r--r-- | fs/afs/inode.c | 3 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 13 | ||||
-rw-r--r-- | fs/btrfs/super.c | 47 | ||||
-rw-r--r-- | fs/cifs/sess.c | 3 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.c | 2 | ||||
-rw-r--r-- | fs/cifs/trace.h | 38 | ||||
-rw-r--r-- | fs/ext2/dir.c | 9 | ||||
-rw-r--r-- | fs/ext4/inode.c | 2 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 26 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
-rw-r--r-- | fs/ext4/namei.c | 3 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 2 | ||||
-rw-r--r-- | fs/ext4/resize.c | 10 | ||||
-rw-r--r-- | fs/ext4/super.c | 172 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 3 | ||||
-rw-r--r-- | fs/io_uring.c | 347 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 2 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 1 | ||||
-rw-r--r-- | fs/nfs/dir.c | 1 | ||||
-rw-r--r-- | fs/nfs/nfs4file.c | 1 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 21 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr.c | 9 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr.h | 12 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.c | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_attr_item.c | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_xattr.c | 17 |
33 files changed, 463 insertions, 354 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 79df61fe0e59..baf2b152229e 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -152,7 +152,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, const unsigned char **wnames, *uname; int i, n, l, clone, access; struct v9fs_session_info *v9ses; - struct p9_fid *fid, *old_fid = NULL; + struct p9_fid *fid, *old_fid; v9ses = v9fs_dentry2v9ses(dentry); access = v9ses->flags & V9FS_ACCESS_MASK; @@ -194,13 +194,12 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, if (IS_ERR(fid)) return fid; + refcount_inc(&fid->count); v9fs_fid_add(dentry->d_sb->s_root, fid); } /* If we are root ourself just return that */ - if (dentry->d_sb->s_root == dentry) { - refcount_inc(&fid->count); + if (dentry->d_sb->s_root == dentry) return fid; - } /* * Do a multipath walk with attached root. * When walking parent we need to make sure we @@ -212,6 +211,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, fid = ERR_PTR(n); goto err_out; } + old_fid = fid; clone = 1; i = 0; while (i < n) { @@ -221,19 +221,15 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, * walk to ensure none of the patch component change */ fid = p9_client_walk(fid, l, &wnames[i], clone); + /* non-cloning walk will return the same fid */ + if (fid != old_fid) { + p9_client_clunk(old_fid); + old_fid = fid; + } if (IS_ERR(fid)) { - if (old_fid) { - /* - * If we fail, clunk fid which are mapping - * to path component and not the last component - * of the path. - */ - p9_client_clunk(old_fid); - } kfree(wnames); goto err_out; } - old_fid = fid; i += l; clone = 0; } diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index a8f512b44a85..d0833fa69faf 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -58,8 +58,21 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) */ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file) { + struct inode *inode = file_inode(file); + struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_fid *fid = file->private_data; + BUG_ON(!fid); + + /* we might need to read from a fid that was opened write-only + * for read-modify-write of page cache, use the writeback fid + * for that */ + if (rreq->origin == NETFS_READ_FOR_WRITE && + (fid->mode & O_ACCMODE) == O_WRONLY) { + fid = v9inode->writeback_fid; + BUG_ON(!fid); + } + refcount_inc(&fid->count); rreq->netfs_priv = fid; return 0; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 419d2f3cf2c2..3d8297714772 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1251,15 +1251,15 @@ static const char *v9fs_vfs_get_link(struct dentry *dentry, return ERR_PTR(-ECHILD); v9ses = v9fs_dentry2v9ses(dentry); - fid = v9fs_fid_lookup(dentry); + if (!v9fs_proto_dotu(v9ses)) + return ERR_PTR(-EBADF); + p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); + fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return ERR_CAST(fid); - if (!v9fs_proto_dotu(v9ses)) - return ERR_PTR(-EBADF); - st = p9_client_stat(fid); p9_client_clunk(fid); if (IS_ERR(st)) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index d17502a738a9..b6eb1160296c 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -274,6 +274,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + p9_client_clunk(dfid); goto out; } @@ -285,6 +286,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n", err); + p9_client_clunk(dfid); goto error; } err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), @@ -292,6 +294,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n", err); + p9_client_clunk(dfid); goto error; } v9fs_invalidate_inode_attr(dir); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 89630acbc2cc..64dab70d4a4f 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -745,7 +745,8 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path, _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); - if (!(query_flags & AT_STATX_DONT_SYNC) && + if (vnode->volume && + !(query_flags & AT_STATX_DONT_SYNC) && !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 89e94ea2fef5..4ba005c41983 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4632,6 +4632,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) int ret; set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); + + /* + * We may have the reclaim task running and relocating a data block group, + * in which case it may create delayed iputs. So stop it before we park + * the cleaner kthread otherwise we can get new delayed iputs after + * parking the cleaner, and that can make the async reclaim task to hang + * if it's waiting for delayed iputs to complete, since the cleaner is + * parked and can not run delayed iputs - this will make us hang when + * trying to stop the async reclaim task. + */ + cancel_work_sync(&fs_info->reclaim_bgs_work); /* * We don't want the cleaner to start new transactions, add more delayed * iputs, etc. while we're closing. We can't use kthread_stop() yet @@ -4672,8 +4683,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) cancel_work_sync(&fs_info->async_data_reclaim_work); cancel_work_sync(&fs_info->preempt_reclaim_work); - cancel_work_sync(&fs_info->reclaim_bgs_work); - /* Cancel or finish ongoing discard work */ btrfs_discard_cleanup(fs_info); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b1fdc6a26c76..6627dd7875ee 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -763,6 +763,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, compress_force = false; no_compress++; } else { + btrfs_err(info, "unrecognized compression value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -821,8 +823,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_thread_pool: ret = match_int(&args[0], &intarg); if (ret) { + btrfs_err(info, "unrecognized thread_pool value %s", + args[0].from); goto out; } else if (intarg == 0) { + btrfs_err(info, "invalid value 0 for thread_pool"); ret = -EINVAL; goto out; } @@ -883,8 +888,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_ratio: ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, "unrecognized metadata_ratio value %s", + args[0].from); goto out; + } info->metadata_ratio = intarg; btrfs_info(info, "metadata ratio %u", info->metadata_ratio); @@ -901,6 +909,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_and_info(info, DISCARD_ASYNC, "turning on async discard"); } else { + btrfs_err(info, "unrecognized discard mode value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -933,6 +943,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_and_info(info, FREE_SPACE_TREE, "enabling free space tree"); } else { + btrfs_err(info, "unrecognized space_cache value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -1014,8 +1026,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_check_integrity_print_mask: ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, + "unrecognized check_integrity_print_mask value %s", + args[0].from); goto out; + } info->check_integrity_print_mask = intarg; btrfs_info(info, "check_integrity_print_mask 0x%x", info->check_integrity_print_mask); @@ -1030,13 +1046,15 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, goto out; #endif case Opt_fatal_errors: - if (strcmp(args[0].from, "panic") == 0) + if (strcmp(args[0].from, "panic") == 0) { btrfs_set_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); - else if (strcmp(args[0].from, "bug") == 0) + } else if (strcmp(args[0].from, "bug") == 0) { btrfs_clear_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); - else { + } else { + btrfs_err(info, "unrecognized fatal_errors value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -1044,8 +1062,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_commit_interval: intarg = 0; ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, "unrecognized commit_interval value %s", + args[0].from); + ret = -EINVAL; goto out; + } if (intarg == 0) { btrfs_info(info, "using default commit interval %us", @@ -1059,8 +1081,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_rescue: ret = parse_rescue_options(info, args[0].from); - if (ret < 0) + if (ret < 0) { + btrfs_err(info, "unrecognized rescue value %s", + args[0].from); goto out; + } break; #ifdef CONFIG_BTRFS_DEBUG case Opt_fragment_all: @@ -1985,6 +2010,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; + /* V1 cache is not supported for subpage mount. */ + if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) { + btrfs_warn(fs_info, + "v1 space cache is not supported for page size %lu with sectorsize %u", + PAGE_SIZE, fs_info->sectorsize); + ret = -EINVAL; + goto restore; + } btrfs_remount_begin(fs_info, old_opts, *flags); btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 0bece97547d4..d417de354d9d 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -81,6 +81,9 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, } /* If we didn't find the channel, it is likely a bug */ + if (server) + cifs_dbg(VFS, "unable to get chan index for server: 0x%llx", + server->conn_id); WARN_ON(1); return 0; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index eaf975f1ad89..b515140bad8d 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -5154,6 +5154,8 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, data = &info; size = sizeof(struct smb2_file_eof_info); + trace_smb3_set_eof(xid, persistent_fid, tcon->tid, tcon->ses->Suid, le64_to_cpu(*eof)); + return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE, 0, 1, &data, &size); diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index 2be5e0c8564d..6b88dc2e364f 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -121,6 +121,44 @@ DEFINE_SMB3_RW_DONE_EVENT(query_dir_done); DEFINE_SMB3_RW_DONE_EVENT(zero_done); DEFINE_SMB3_RW_DONE_EVENT(falloc_done); +/* For logging successful set EOF (truncate) */ +DECLARE_EVENT_CLASS(smb3_eof_class, + TP_PROTO(unsigned int xid, + __u64 fid, + __u32 tid, + __u64 sesid, + __u64 offset), + TP_ARGS(xid, fid, tid, sesid, offset), + TP_STRUCT__entry( + __field(unsigned int, xid) + __field(__u64, fid) + __field(__u32, tid) + __field(__u64, sesid) + __field(__u64, offset) + ), + TP_fast_assign( + __entry->xid = xid; + __entry->fid = fid; + __entry->tid = tid; + __entry->sesid = sesid; + __entry->offset = offset; + ), + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx", + __entry->xid, __entry->sesid, __entry->tid, __entry->fid, + __entry->offset) +) + +#define DEFINE_SMB3_EOF_EVENT(name) \ +DEFINE_EVENT(smb3_eof_class, smb3_##name, \ + TP_PROTO(unsigned int xid, \ + __u64 fid, \ + __u32 tid, \ + __u64 sesid, \ + __u64 offset), \ + TP_ARGS(xid, fid, tid, sesid, offset)) + +DEFINE_SMB3_EOF_EVENT(set_eof); + /* * For handle based calls other than read and write, and get/set info */ diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 2c2f179b6977..43de293cef56 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -672,17 +672,14 @@ int ext2_empty_dir (struct inode * inode) void *page_addr = NULL; struct page *page = NULL; unsigned long i, npages = dir_pages(inode); - int dir_has_error = 0; for (i = 0; i < npages; i++) { char *kaddr; ext2_dirent * de; - page = ext2_get_page(inode, i, dir_has_error, &page_addr); + page = ext2_get_page(inode, i, 0, &page_addr); - if (IS_ERR(page)) { - dir_has_error = 1; - continue; - } + if (IS_ERR(page)) + goto not_empty; kaddr = page_addr; de = (ext2_dirent *)kaddr; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3dce7d058985..84c0eb55071d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -829,7 +829,7 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n", inode->i_ino, create); return _ext4_get_block(inode, iblock, bh_result, - EXT4_GET_BLOCKS_IO_CREATE_EXT); + EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT); } /* Maximum number of blocks we map for direct IO at once. */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9f12f29bc346..9e06334771a3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4104,6 +4104,15 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, size = size >> bsbits; start = start_off >> bsbits; + /* + * For tiny groups (smaller than 8MB) the chosen allocation + * alignment may be larger than group size. Make sure the + * alignment does not move allocation to a different group which + * makes mballoc fail assertions later. + */ + start = max(start, rounddown(ac->ac_o_ex.fe_logical, + (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb))); + /* don't cover already allocated blocks in selected range */ if (ar->pleft && start <= ar->lleft) { size -= ar->lleft + 1 - start; @@ -4176,7 +4185,22 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, } rcu_read_unlock(); - if (start + size <= ac->ac_o_ex.fe_logical && + /* + * In this function "start" and "size" are normalized for better + * alignment and length such that we could preallocate more blocks. + * This normalization is done such that original request of + * ac->ac_o_ex.fe_logical & fe_len should always lie within "start" and + * "size" boundaries. + * (Note fe_len can be relaxed since FS block allocation API does not + * provide gurantee on number of contiguous blocks allocation since that + * depends upon free space left, etc). + * In case of inode pa, later we use the allocated blocks + * [pa_start + fe_logical - pa_lstart, fe_len/size] from the preallocated + * range of goal/best blocks [start, size] to put it at the + * ac_o_ex.fe_logical extent of this inode. + * (See ext4_mb_use_inode_pa() for more details) + */ + if (start + size <= ac->ac_o_ex.fe_logical || start > ac->ac_o_ex.fe_logical) { ext4_msg(ac->ac_sb, KERN_ERR, "start %lu, size %lu, fe_logical %lu", diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 7a5353a8cfd7..42f590518b4c 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -438,7 +438,7 @@ int ext4_ext_migrate(struct inode *inode) /* * Worst case we can touch the allocation bitmaps and a block - * group descriptor block. We do need need to worry about + * group descriptor block. We do need to worry about * credits for modifying the quota inode. */ handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 47d0ca4c795b..db4ba99d1ceb 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1929,7 +1929,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct dx_hash_info *hinfo) { unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; + unsigned continued; + int count; struct buffer_head *bh2; ext4_lblk_t newblock; u32 hash2; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 14695e2b5042..97fa7b4c645f 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -465,7 +465,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, /* * In the first loop we prepare and mark buffers to submit. We have to * mark all buffers in the page before submitting so that - * end_page_writeback() cannot be called from ext4_bio_end_io() when IO + * end_page_writeback() cannot be called from ext4_end_bio() when IO * on the first buffer finishes and we are still working on submitting * the second buffer. */ diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 90a941d20dff..8b70a4701293 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -54,6 +54,16 @@ int ext4_resize_begin(struct super_block *sb) return -EPERM; /* + * If the reserved GDT blocks is non-zero, the resize_inode feature + * should always be set. + */ + if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks && + !ext4_has_feature_resize_inode(sb)) { + ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero"); + return -EFSCORRUPTED; + } + + /* * If we are not using the primary superblock/GDT copy don't resize, * because the user tools have no way of handling this. Probably a * bad time to do it anyways. diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 450c918d68fc..845f2f8aee5f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -87,7 +87,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, static int ext4_validate_options(struct fs_context *fc); static int ext4_check_opt_consistency(struct fs_context *fc, struct super_block *sb); -static int ext4_apply_options(struct fs_context *fc, struct super_block *sb); +static void ext4_apply_options(struct fs_context *fc, struct super_block *sb); static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param); static int ext4_get_tree(struct fs_context *fc); static int ext4_reconfigure(struct fs_context *fc); @@ -1870,31 +1870,12 @@ ext4_sb_read_encoding(const struct ext4_super_block *es) } #endif -static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) -{ -#ifdef CONFIG_FS_ENCRYPTION - struct ext4_sb_info *sbi = EXT4_SB(sb); - int err; - - err = fscrypt_set_test_dummy_encryption(sb, arg, - &sbi->s_dummy_enc_policy); - if (err) { - ext4_msg(sb, KERN_WARNING, - "Error while setting test dummy encryption [%d]", err); - return err; - } - ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); -#endif - return 0; -} - #define EXT4_SPEC_JQUOTA (1 << 0) #define EXT4_SPEC_JQFMT (1 << 1) #define EXT4_SPEC_DATAJ (1 << 2) #define EXT4_SPEC_SB_BLOCK (1 << 3) #define EXT4_SPEC_JOURNAL_DEV (1 << 4) #define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5) -#define EXT4_SPEC_DUMMY_ENCRYPTION (1 << 6) #define EXT4_SPEC_s_want_extra_isize (1 << 7) #define EXT4_SPEC_s_max_batch_time (1 << 8) #define EXT4_SPEC_s_min_batch_time (1 << 9) @@ -1911,7 +1892,7 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) struct ext4_fs_context { char *s_qf_names[EXT4_MAXQUOTAS]; - char *test_dummy_enc_arg; + struct fscrypt_dummy_policy dummy_enc_policy; int s_jquota_fmt; /* Format of quota to use */ #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; @@ -1953,7 +1934,7 @@ static void ext4_fc_free(struct fs_context *fc) for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(ctx->s_qf_names[i]); - kfree(ctx->test_dummy_enc_arg); + fscrypt_free_dummy_policy(&ctx->dummy_enc_policy); kfree(ctx); } @@ -2029,6 +2010,29 @@ static int unnote_qf_name(struct fs_context *fc, int qtype) } #endif +static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param, + struct ext4_fs_context *ctx) +{ + int err; + + if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) { + ext4_msg(NULL, KERN_WARNING, + "test_dummy_encryption option not supported"); + return -EINVAL; + } + err = fscrypt_parse_test_dummy_encryption(param, + &ctx->dummy_enc_policy); + if (err == -EINVAL) { + ext4_msg(NULL, KERN_WARNING, + "Value of option \"%s\" is unrecognized", param->key); + } else if (err == -EEXIST) { + ext4_msg(NULL, KERN_WARNING, + "Conflicting test_dummy_encryption options"); + return -EINVAL; + } + return err; +} + #define EXT4_SET_CTX(name) \ static inline void ctx_set_##name(struct ext4_fs_context *ctx, \ unsigned long flag) \ @@ -2291,29 +2295,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO; return 0; case Opt_test_dummy_encryption: -#ifdef CONFIG_FS_ENCRYPTION - if (param->type == fs_value_is_flag) { - ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; - ctx->test_dummy_enc_arg = NULL; - return 0; - } - if (*param->string && - !(!strcmp(param->string, "v1") || - !strcmp(param->string, "v2"))) { - ext4_msg(NULL, KERN_WARNING, - "Value of option \"%s\" is unrecognized", - param->key); - return -EINVAL; - } - ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; - ctx->test_dummy_enc_arg = kmemdup_nul(param->string, param->size, - GFP_KERNEL); - return 0; -#else - ext4_msg(NULL, KERN_WARNING, - "test_dummy_encryption option not supported"); - return -EINVAL; -#endif + return ext4_parse_test_dummy_encryption(param, ctx); case Opt_dax: case Opt_dax_type: #ifdef CONFIG_FS_DAX @@ -2504,7 +2486,8 @@ parse_failed: if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO) m_ctx->journal_ioprio = s_ctx->journal_ioprio; - ret = ext4_apply_options(fc, sb); + ext4_apply_options(fc, sb); + ret = 0; out_free: if (fc) { @@ -2673,11 +2656,11 @@ err_jquota_specified: static int ext4_check_test_dummy_encryption(const struct fs_context *fc, struct super_block *sb) { -#ifdef CONFIG_FS_ENCRYPTION const struct ext4_fs_context *ctx = fc->fs_private; const struct ext4_sb_info *sbi = EXT4_SB(sb); + int err; - if (!(ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION)) + if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy)) return 0; if (!ext4_has_feature_encrypt(sb)) { @@ -2691,14 +2674,46 @@ static int ext4_check_test_dummy_encryption(const struct fs_context *fc, * needed to allow it to be set or changed during remount. We do allow * it to be specified during remount, but only if there is no change. */ - if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE && - !sbi->s_dummy_enc_policy.policy) { + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy, + &ctx->dummy_enc_policy)) + return 0; ext4_msg(NULL, KERN_WARNING, - "Can't set test_dummy_encryption on remount"); + "Can't set or change test_dummy_encryption on remount"); return -EINVAL; } -#endif /* CONFIG_FS_ENCRYPTION */ - return 0; + /* Also make sure s_mount_opts didn't contain a conflicting value. */ + if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) { + if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy, + &ctx->dummy_enc_policy)) + return 0; + ext4_msg(NULL, KERN_WARNING, + "Conflicting test_dummy_encryption options"); + return -EINVAL; + } + /* + * fscrypt_add_test_dummy_key() technically changes the super_block, so + * technically it should be delayed until ext4_apply_options() like the + * other changes. But since we never get here for remounts (see above), + * and this is the last chance to report errors, we do it here. + */ + err = fscrypt_add_test_dummy_key(sb, &ctx->dummy_enc_policy); + if (err) + ext4_msg(NULL, KERN_WARNING, + "Error adding test dummy encryption key [%d]", err); + return err; +} + +static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx, + struct super_block *sb) +{ + if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) || + /* if already set, it was already verified to be the same */ + fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy)) + return; + EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy; + memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy)); + ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); } static int ext4_check_opt_consistency(struct fs_context *fc, @@ -2785,11 +2800,10 @@ fail_dax_change_remount: return ext4_check_quota_consistency(fc, sb); } -static int ext4_apply_options(struct fs_context *fc, struct super_block *sb) +static void ext4_apply_options(struct fs_context *fc, struct super_block *sb) { struct ext4_fs_context *ctx = fc->fs_private; struct ext4_sb_info *sbi = fc->s_fs_info; - int ret = 0; sbi->s_mount_opt &= ~ctx->mask_s_mount_opt; sbi->s_mount_opt |= ctx->vals_s_mount_opt; @@ -2825,11 +2839,7 @@ static int ext4_apply_options(struct fs_context *fc, struct super_block *sb) #endif ext4_apply_quota_options(fc, sb); - - if (ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) - ret = ext4_set_test_dummy_encryption(sb, ctx->test_dummy_enc_arg); - - return ret; + ext4_apply_test_dummy_encryption(ctx, sb); } @@ -4552,9 +4562,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (err < 0) goto failed_mount; - err = ext4_apply_options(fc, sb); - if (err < 0) - goto failed_mount; + ext4_apply_options(fc, sb); #if IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { @@ -5302,14 +5310,6 @@ no_journal: err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, GFP_KERNEL); } - /* - * Update the checksum after updating free space/inode - * counters. Otherwise the superblock can have an incorrect - * checksum in the buffer cache until it is written out and - * e2fsprogs programs trying to open a file system immediately - * after it is mounted can fail. - */ - ext4_superblock_csum_set(sb); if (!err) err = percpu_counter_init(&sbi->s_dirs_counter, ext4_count_dirs(sb), GFP_KERNEL); @@ -5367,6 +5367,14 @@ no_journal: EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; + /* + * Update the checksum after updating free space/inode counters and + * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect + * checksum in the buffer cache until it is written out and + * e2fsprogs programs trying to open a file system immediately + * after it is mounted can fail. + */ + ext4_superblock_csum_set(sb); if (needs_recovery) { ext4_msg(sb, KERN_INFO, "recovery complete"); err = ext4_mark_recovery_complete(sb, es); @@ -5898,7 +5906,6 @@ static void ext4_update_super(struct super_block *sb) static int ext4_commit_super(struct super_block *sb) { struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; - int error = 0; if (!sbh) return -EINVAL; @@ -5907,6 +5914,13 @@ static int ext4_commit_super(struct super_block *sb) ext4_update_super(sb); + lock_buffer(sbh); + /* Buffer got discarded which means block device got invalidated */ + if (!buffer_mapped(sbh)) { + unlock_buffer(sbh); + return -EIO; + } + if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -5921,17 +5935,21 @@ static int ext4_commit_super(struct super_block *sb) clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } - BUFFER_TRACE(sbh, "marking dirty"); - mark_buffer_dirty(sbh); - error = __sync_dirty_buffer(sbh, - REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); + get_bh(sbh); + /* Clear potential dirty bit if it was journalled update */ + clear_buffer_dirty(sbh); + sbh->b_end_io = end_buffer_write_sync; + submit_bh(REQ_OP_WRITE, + REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh); + wait_on_buffer(sbh); if (buffer_write_io_error(sbh)) { ext4_msg(sb, KERN_ERR, "I/O error while writing " "superblock"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); + return -EIO; } - return error; + return 0; } /* diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 042325349098..564e28a1aa94 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1895,11 +1895,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, unlock_buffer(bs->bh); ea_bdebug(bs->bh, "cloning"); - s->base = kmalloc(bs->bh->b_size, GFP_NOFS); + s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS); error = -ENOMEM; if (s->base == NULL) goto cleanup; - memcpy(s->base, BHDR(bs->bh), bs->bh->b_size); s->first = ENTRY(header(s->base)+1); header(s->base)->h_refcount = cpu_to_le32(1); s->here = ENTRY(s->base + offset); diff --git a/fs/io_uring.c b/fs/io_uring.c index 3aab4182fd89..d3ee4fc532fa 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -298,8 +298,8 @@ struct io_buffer_list { /* below is for ring provided buffers */ __u16 buf_nr_pages; __u16 nr_entries; - __u32 head; - __u32 mask; + __u16 head; + __u16 mask; }; struct io_buffer { @@ -576,7 +576,6 @@ struct io_close { struct file *file; int fd; u32 file_slot; - u32 flags; }; struct io_timeout_data { @@ -784,12 +783,6 @@ struct io_msg { u32 len; }; -struct io_nop { - struct file *file; - u64 extra1; - u64 extra2; -}; - struct io_async_connect { struct sockaddr_storage address; }; @@ -851,6 +844,7 @@ enum { REQ_F_SINGLE_POLL_BIT, REQ_F_DOUBLE_POLL_BIT, REQ_F_PARTIAL_IO_BIT, + REQ_F_CQE32_INIT_BIT, REQ_F_APOLL_MULTISHOT_BIT, /* keep async read/write and isreg together and in order */ REQ_F_SUPPORT_NOWAIT_BIT, @@ -920,6 +914,8 @@ enum { REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), /* fast poll multishot mode */ REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), + /* ->extra1 and ->extra2 are initialised */ + REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT), }; struct async_poll { @@ -994,7 +990,6 @@ struct io_kiocb { struct io_msg msg; struct io_xattr xattr; struct io_socket sock; - struct io_nop nop; struct io_uring_cmd uring_cmd; }; @@ -1121,7 +1116,6 @@ static const struct io_op_def io_op_defs[] = { [IORING_OP_NOP] = { .audit_skip = 1, .iopoll = 1, - .buffer_select = 1, }, [IORING_OP_READV] = { .needs_file = 1, @@ -1729,9 +1723,16 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) return; - /* don't recycle if we already did IO to this buffer */ - if (req->flags & REQ_F_PARTIAL_IO) + /* + * For legacy provided buffer mode, don't recycle if we already did + * IO to this buffer. For ring-mapped provided buffer mode, we should + * increment ring->head to explicitly monopolize the buffer to avoid + * multiple use. + */ + if ((req->flags & REQ_F_BUFFER_SELECTED) && + (req->flags & REQ_F_PARTIAL_IO)) return; + /* * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear * the flag and hence ensure that bl->head doesn't get incremented. @@ -1739,8 +1740,13 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) */ if (req->flags & REQ_F_BUFFER_RING) { if (req->buf_list) { - req->buf_index = req->buf_list->bgid; - req->flags &= ~REQ_F_BUFFER_RING; + if (req->flags & REQ_F_PARTIAL_IO) { + req->buf_list->head++; + req->buf_list = NULL; + } else { + req->buf_index = req->buf_list->bgid; + req->flags &= ~REQ_F_BUFFER_RING; + } } return; } @@ -2441,94 +2447,66 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, return true; } -static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data, - s32 res, u32 cflags) +static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx, + struct io_kiocb *req) { struct io_uring_cqe *cqe; - /* - * If we can't get a cq entry, userspace overflowed the - * submission (by quite a lot). Increment the overflow count in - * the ring. - */ - cqe = io_get_cqe(ctx); - if (likely(cqe)) { - WRITE_ONCE(cqe->user_data, user_data); - WRITE_ONCE(cqe->res, res); - WRITE_ONCE(cqe->flags, cflags); - return true; - } - return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0); -} + if (!(ctx->flags & IORING_SETUP_CQE32)) { + trace_io_uring_complete(req->ctx, req, req->cqe.user_data, + req->cqe.res, req->cqe.flags, 0, 0); -static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx, - struct io_kiocb *req) -{ - struct io_uring_cqe *cqe; + /* + * If we can't get a cq entry, userspace overflowed the + * submission (by quite a lot). Increment the overflow count in + * the ring. + */ + cqe = io_get_cqe(ctx); + if (likely(cqe)) { + memcpy(cqe, &req->cqe, sizeof(*cqe)); + return true; + } - trace_io_uring_complete(req->ctx, req, req->cqe.user_data, - req->cqe.res, req->cqe.flags, 0, 0); + return io_cqring_event_overflow(ctx, req->cqe.user_data, + req->cqe.res, req->cqe.flags, + 0, 0); + } else { + u64 extra1 = 0, extra2 = 0; - /* - * If we can't get a cq entry, userspace overflowed the - * submission (by quite a lot). Increment the overflow count in - * the ring. - */ - cqe = io_get_cqe(ctx); - if (likely(cqe)) { - memcpy(cqe, &req->cqe, sizeof(*cqe)); - return true; - } - return io_cqring_event_overflow(ctx, req->cqe.user_data, - req->cqe.res, req->cqe.flags, 0, 0); -} + if (req->flags & REQ_F_CQE32_INIT) { + extra1 = req->extra1; + extra2 = req->extra2; + } -static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx, - struct io_kiocb *req) -{ - struct io_uring_cqe *cqe; - u64 extra1 = req->extra1; - u64 extra2 = req->extra2; + trace_io_uring_complete(req->ctx, req, req->cqe.user_data, + req->cqe.res, req->cqe.flags, extra1, extra2); - trace_io_uring_complete(req->ctx, req, req->cqe.user_data, - req->cqe.res, req->cqe.flags, extra1, extra2); + /* + * If we can't get a cq entry, userspace overflowed the + * submission (by quite a lot). Increment the overflow count in + * the ring. + */ + cqe = io_get_cqe(ctx); + if (likely(cqe)) { + memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe)); + WRITE_ONCE(cqe->big_cqe[0], extra1); + WRITE_ONCE(cqe->big_cqe[1], extra2); + return true; + } - /* - * If we can't get a cq entry, userspace overflowed the - * submission (by quite a lot). Increment the overflow count in - * the ring. - */ - cqe = io_get_cqe(ctx); - if (likely(cqe)) { - memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe)); - cqe->big_cqe[0] = extra1; - cqe->big_cqe[1] = extra2; - return true; + return io_cqring_event_overflow(ctx, req->cqe.user_data, + req->cqe.res, req->cqe.flags, + extra1, extra2); } - - return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res, - req->cqe.flags, extra1, extra2); -} - -static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags) -{ - trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0); - return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags); } -static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags, - u64 extra1, u64 extra2) +static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, + s32 res, u32 cflags) { - struct io_ring_ctx *ctx = req->ctx; struct io_uring_cqe *cqe; - if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32))) - return; - if (req->flags & REQ_F_CQE_SKIP) - return; - - trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags, - extra1, extra2); + ctx->cq_extra++; + trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0); /* * If we can't get a cq entry, userspace overflowed the @@ -2537,23 +2515,17 @@ static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags */ cqe = io_get_cqe(ctx); if (likely(cqe)) { - WRITE_ONCE(cqe->user_data, req->cqe.user_data); + WRITE_ONCE(cqe->user_data, user_data); WRITE_ONCE(cqe->res, res); WRITE_ONCE(cqe->flags, cflags); - WRITE_ONCE(cqe->big_cqe[0], extra1); - WRITE_ONCE(cqe->big_cqe[1], extra2); - return; - } - io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2); -} - -static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, - s32 res, u32 cflags) -{ - ctx->cq_extra++; - trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0); - return __io_fill_cqe(ctx, user_data, res, cflags); + if (ctx->flags & IORING_SETUP_CQE32) { + WRITE_ONCE(cqe->big_cqe[0], 0); + WRITE_ONCE(cqe->big_cqe[1], 0); + } + return true; + } + return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0); } static void __io_req_complete_put(struct io_kiocb *req) @@ -2590,16 +2562,11 @@ static void __io_req_complete_put(struct io_kiocb *req) static void __io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags) { - if (!(req->flags & REQ_F_CQE_SKIP)) - __io_fill_cqe_req(req, res, cflags); - __io_req_complete_put(req); -} - -static void __io_req_complete_post32(struct io_kiocb *req, s32 res, - u32 cflags, u64 extra1, u64 extra2) -{ - if (!(req->flags & REQ_F_CQE_SKIP)) - __io_fill_cqe32_req(req, res, cflags, extra1, extra2); + if (!(req->flags & REQ_F_CQE_SKIP)) { + req->cqe.res = res; + req->cqe.flags = cflags; + __io_fill_cqe_req(req->ctx, req); + } __io_req_complete_put(req); } @@ -2614,18 +2581,6 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags) io_cqring_ev_posted(ctx); } -static void io_req_complete_post32(struct io_kiocb *req, s32 res, - u32 cflags, u64 extra1, u64 extra2) -{ - struct io_ring_ctx *ctx = req->ctx; - - spin_lock(&ctx->completion_lock); - __io_req_complete_post32(req, res, cflags, extra1, extra2); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - io_cqring_ev_posted(ctx); -} - static inline void io_req_complete_state(struct io_kiocb *req, s32 res, u32 cflags) { @@ -2643,19 +2598,6 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, io_req_complete_post(req, res, cflags); } -static inline void __io_req_complete32(struct io_kiocb *req, - unsigned int issue_flags, s32 res, - u32 cflags, u64 extra1, u64 extra2) -{ - if (issue_flags & IO_URING_F_COMPLETE_DEFER) { - io_req_complete_state(req, res, cflags); - req->extra1 = extra1; - req->extra2 = extra2; - } else { - io_req_complete_post32(req, res, cflags, extra1, extra2); - } -} - static inline void io_req_complete(struct io_kiocb *req, s32 res) { if (res < 0) @@ -3202,12 +3144,8 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx) struct io_kiocb *req = container_of(node, struct io_kiocb, comp_list); - if (!(req->flags & REQ_F_CQE_SKIP)) { - if (!(ctx->flags & IORING_SETUP_CQE32)) - __io_fill_cqe_req_filled(ctx, req); - else - __io_fill_cqe32_req_filled(ctx, req); - } + if (!(req->flags & REQ_F_CQE_SKIP)) + __io_fill_cqe_req(ctx, req); } io_commit_cqring(ctx); @@ -3326,7 +3264,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) nr_events++; if (unlikely(req->flags & REQ_F_CQE_SKIP)) continue; - __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0)); + + req->cqe.flags = io_put_kbuf(req, 0); + __io_fill_cqe_req(req->ctx, req); } if (unlikely(!nr_events)) @@ -3677,6 +3617,20 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) int ret; kiocb->ki_pos = READ_ONCE(sqe->off); + /* used for fixed read/write too - just read unconditionally */ + req->buf_index = READ_ONCE(sqe->buf_index); + + if (req->opcode == IORING_OP_READ_FIXED || + req->opcode == IORING_OP_WRITE_FIXED) { + struct io_ring_ctx *ctx = req->ctx; + u16 index; + + if (unlikely(req->buf_index >= ctx->nr_user_bufs)) + return -EFAULT; + index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); + req->imu = ctx->user_bufs[index]; + io_req_set_rsrc_node(req, ctx, 0); + } ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { @@ -3689,12 +3643,9 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) kiocb->ki_ioprio = get_current_ioprio(); } - req->imu = NULL; req->rw.addr = READ_ONCE(sqe->addr); req->rw.len = READ_ONCE(sqe->len); req->rw.flags = READ_ONCE(sqe->rw_flags); - /* used for fixed read/write too - just read unconditionally */ - req->buf_index = READ_ONCE(sqe->buf_index); return 0; } @@ -3826,20 +3777,9 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, unsigned int issue_flags) { - struct io_mapped_ubuf *imu = req->imu; - u16 index, buf_index = req->buf_index; - - if (likely(!imu)) { - struct io_ring_ctx *ctx = req->ctx; - - if (unlikely(buf_index >= ctx->nr_user_bufs)) - return -EFAULT; - io_req_set_rsrc_node(req, ctx, issue_flags); - index = array_index_nospec(buf_index, ctx->nr_user_bufs); - imu = READ_ONCE(ctx->user_bufs[index]); - req->imu = imu; - } - return __io_import_fixed(req, rw, iter, imu); + if (WARN_ON_ONCE(!req->imu)) + return -EFAULT; + return __io_import_fixed(req, rw, iter, req->imu); } static int io_buffer_add_list(struct io_ring_ctx *ctx, @@ -3876,19 +3816,17 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, { struct io_uring_buf_ring *br = bl->buf_ring; struct io_uring_buf *buf; - __u32 head = bl->head; + __u16 head = bl->head; - if (unlikely(smp_load_acquire(&br->tail) == head)) { - io_ring_submit_unlock(req->ctx, issue_flags); + if (unlikely(smp_load_acquire(&br->tail) == head)) return NULL; - } head &= bl->mask; if (head < IO_BUFFER_LIST_BUF_PER_PAGE) { buf = &br->bufs[head]; } else { int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1); - int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1; + int index = head / IO_BUFFER_LIST_BUF_PER_PAGE; buf = page_address(bl->buf_pages[index]); buf += off; } @@ -3898,7 +3836,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, req->buf_list = bl; req->buf_index = buf->bid; - if (issue_flags & IO_URING_F_UNLOCKED) { + if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) { /* * If we came in unlocked, we have no choice but to consume the * buffer here. This does mean it'll be pinned until the IO @@ -5079,10 +5017,18 @@ void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, req->uring_cmd.task_work_cb = task_work_cb; req->io_task_work.func = io_uring_cmd_work; - io_req_task_prio_work_add(req); + io_req_task_work_add(req); } EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task); +static inline void io_req_set_cqe32_extra(struct io_kiocb *req, + u64 extra1, u64 extra2) +{ + req->extra1 = extra1; + req->extra2 = extra2; + req->flags |= REQ_F_CQE32_INIT; +} + /* * Called by consumers of io_uring_cmd, if they originally returned * -EIOCBQUEUED upon receiving the command. @@ -5093,10 +5039,10 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) if (ret < 0) req_set_fail(req); + if (req->ctx->flags & IORING_SETUP_CQE32) - __io_req_complete32(req, 0, ret, 0, res2, 0); - else - io_req_complete(req, ret); + io_req_set_cqe32_extra(req, res2, 0); + io_req_complete(req, ret); } EXPORT_SYMBOL_GPL(io_uring_cmd_done); @@ -5258,14 +5204,6 @@ done: static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - /* - * If the ring is setup with CQE32, relay back addr/addr - */ - if (req->ctx->flags & IORING_SETUP_CQE32) { - req->nop.extra1 = READ_ONCE(sqe->addr); - req->nop.extra2 = READ_ONCE(sqe->addr2); - } - return 0; } @@ -5274,23 +5212,7 @@ static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) */ static int io_nop(struct io_kiocb *req, unsigned int issue_flags) { - unsigned int cflags; - void __user *buf; - - if (req->flags & REQ_F_BUFFER_SELECT) { - size_t len = 1; - - buf = io_buffer_select(req, &len, issue_flags); - if (!buf) - return -ENOBUFS; - } - - cflags = io_put_kbuf(req, issue_flags); - if (!(req->ctx->flags & IORING_SETUP_CQE32)) - __io_req_complete(req, issue_flags, 0, cflags); - else - __io_req_complete32(req, issue_flags, 0, cflags, - req->nop.extra1, req->nop.extra2); + __io_req_complete(req, issue_flags, 0, 0); return 0; } @@ -5988,18 +5910,14 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags) static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (sqe->off || sqe->addr || sqe->len || sqe->buf_index) + if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) return -EBADF; req->close.fd = READ_ONCE(sqe->fd); req->close.file_slot = READ_ONCE(sqe->file_index); - req->close.flags = READ_ONCE(sqe->close_flags); - if (req->close.flags & ~IORING_CLOSE_FD_AND_FILE_SLOT) - return -EINVAL; - if (!(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT) && - req->close.file_slot && req->close.fd) + if (req->close.file_slot && req->close.fd) return -EINVAL; return 0; @@ -6015,8 +5933,7 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags) if (req->close.file_slot) { ret = io_close_fixed(req, issue_flags); - if (ret || !(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT)) - goto err; + goto err; } spin_lock(&files->file_lock); @@ -8063,8 +7980,8 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req, if (ret < 0) break; if (copy_to_user(&fds[done], &ret, sizeof(ret))) { - ret = -EFAULT; __io_close_fixed(req, issue_flags, ret); + ret = -EFAULT; break; } } @@ -8773,6 +8690,7 @@ static void io_queue_async(struct io_kiocb *req, int ret) * Queued up for async execution, worker will release * submit reference when the iocb is actually submitted. */ + io_kbuf_recycle(req, 0); io_queue_iowq(req, NULL); break; case IO_APOLL_OK: @@ -9788,11 +9706,19 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { + unsigned nr = ctx->nr_user_files; int ret; if (!ctx->file_data) return -ENXIO; + + /* + * Quiesce may unlock ->uring_lock, and while it's not held + * prevent new requests using the table. + */ + ctx->nr_user_files = 0; ret = io_rsrc_ref_quiesce(ctx->file_data, ctx); + ctx->nr_user_files = nr; if (!ret) __io_sqe_files_unregister(ctx); return ret; @@ -10690,12 +10616,19 @@ static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx) static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) { + unsigned nr = ctx->nr_user_bufs; int ret; if (!ctx->buf_data) return -ENXIO; + /* + * Quiesce may unlock ->uring_lock, and while it's not held + * prevent new requests using the table. + */ + ctx->nr_user_bufs = 0; ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx); + ctx->nr_user_bufs = nr; if (!ret) __io_sqe_buffers_unregister(ctx); return ret; @@ -13002,6 +12935,10 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (!is_power_of_2(reg.ring_entries)) return -EINVAL; + /* cannot disambiguate full vs empty due to head/tail size */ + if (reg.ring_entries >= 65536) + return -EINVAL; + if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { int ret = io_init_bl_list(ctx); if (ret) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e49bb0938376..e9c308ae475f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2114,7 +2114,7 @@ out: /** * jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation - * @page: to try and free + * @folio: Folio to detach data from. * * For all the buffers on this page, * if they are fully written out ordered data, move them onto BUF_CLEAN diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c8520284dda7..c1eda73254e1 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -288,6 +288,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, rv = NFS4_OK; break; case -ENOENT: + set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); /* Embrace your forgetfulness! */ rv = NFS4ERR_NOMATCHING_LAYOUT; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a8ecdd527662..0c4e8dd6aa96 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2124,6 +2124,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, } goto out; } + file->f_mode |= FMODE_CAN_ODIRECT; err = nfs_finish_open(ctx, ctx->dentry, file, open_flags); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 03d3a270eff4..e88f6b18445e 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -93,6 +93,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) nfs_file_set_open_context(filp, ctx); nfs_fscache_open_file(inode, filp); err = 0; + filp->f_mode |= FMODE_CAN_ODIRECT; out_put_ctx: put_nfs_open_context(ctx); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 68a87be3e6f9..41a9b6b58fb9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -469,6 +469,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, pnfs_clear_lseg_state(lseg, lseg_list); pnfs_clear_layoutreturn_info(lo); pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); + set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) pnfs_clear_layoutreturn_waitbit(lo); @@ -1917,8 +1918,9 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) { - if (atomic_dec_and_test(&lo->plh_outstanding)) - wake_up_var(&lo->plh_outstanding); + if (atomic_dec_and_test(&lo->plh_outstanding) && + test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); } static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) @@ -2025,11 +2027,11 @@ lookup_again: * If the layout segment list is empty, but there are outstanding * layoutget calls, then they might be subject to a layoutrecall. */ - if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) && + if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && atomic_read(&lo->plh_outstanding) != 0) { spin_unlock(&ino->i_lock); - lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, - !atomic_read(&lo->plh_outstanding))); + lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN, + TASK_KILLABLE)); if (IS_ERR(lseg)) goto out_put_layout_hdr; pnfs_put_layout_hdr(lo); @@ -2152,6 +2154,12 @@ lookup_again: case -ERECALLCONFLICT: case -EAGAIN: break; + case -ENODATA: + /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */ + pnfs_layout_set_fail_bit( + lo, pnfs_iomode_to_fail_bit(iomode)); + lseg = NULL; + goto out_put_layout_hdr; default: if (!nfs_error_is_fatal(PTR_ERR(lseg))) { pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); @@ -2407,7 +2415,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget; } - if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo)) + if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && + !pnfs_is_first_layoutget(lo)) goto out_forget; if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 07f11489e4e9..f331f067691b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -105,6 +105,7 @@ enum { NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */ NFS_LAYOUT_HASHED, /* The layout visible */ + NFS_LAYOUT_DRAIN, }; enum layoutdriver_policy_flags { diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 836ab1b8ed7b..1824f61621a2 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -997,9 +997,11 @@ xfs_attr_set( /* * We have no control over the attribute names that userspace passes us * to remove, so we have to allow the name lookup prior to attribute - * removal to fail as well. + * removal to fail as well. Preserve the logged flag, since we need + * to pass that through to the logging code. */ - args->op_flags = XFS_DA_OP_OKNOENT; + args->op_flags = XFS_DA_OP_OKNOENT | + (args->op_flags & XFS_DA_OP_LOGGED); if (args->value) { XFS_STATS_INC(mp, xs_attr_set); @@ -1439,12 +1441,11 @@ static int xfs_attr_node_try_addname( struct xfs_attr_intent *attr) { - struct xfs_da_args *args = attr->xattri_da_args; struct xfs_da_state *state = attr->xattri_da_state; struct xfs_da_state_blk *blk; int error; - trace_xfs_attr_node_addname(args); + trace_xfs_attr_node_addname(state->args); blk = &state->path.blk[state->path.active-1]; ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index e329da3e7afa..b4a2fc77017e 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -28,16 +28,6 @@ struct xfs_attr_list_context; */ #define ATTR_MAX_VALUELEN (64*1024) /* max length of a value */ -static inline bool xfs_has_larp(struct xfs_mount *mp) -{ -#ifdef DEBUG - /* Logged xattrs require a V5 super for log_incompat */ - return xfs_has_crc(mp) && xfs_globals.larp; -#else - return false; -#endif -} - /* * Kernel-internal version of the attrlist cursor. */ @@ -624,7 +614,7 @@ static inline enum xfs_delattr_state xfs_attr_init_replace_state(struct xfs_da_args *args) { args->op_flags |= XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE; - if (xfs_has_larp(args->dp->i_mount)) + if (args->op_flags & XFS_DA_OP_LOGGED) return xfs_attr_init_remove_state(args); return xfs_attr_init_add_state(args); } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 15a990409463..37e7c33f6283 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -1530,7 +1530,7 @@ xfs_attr3_leaf_add_work( if (tmp) entry->flags |= XFS_ATTR_LOCAL; if (args->op_flags & XFS_DA_OP_REPLACE) { - if (!xfs_has_larp(mp)) + if (!(args->op_flags & XFS_DA_OP_LOGGED)) entry->flags |= XFS_ATTR_INCOMPLETE; if ((args->blkno2 == args->blkno) && (args->index2 <= args->index)) { diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index d33b7686a0b3..ffa3df5b2893 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -92,6 +92,7 @@ typedef struct xfs_da_args { #define XFS_DA_OP_NOTIME (1u << 5) /* don't update inode timestamps */ #define XFS_DA_OP_REMOVE (1u << 6) /* this is a remove operation */ #define XFS_DA_OP_RECOVERY (1u << 7) /* Log recovery operation */ +#define XFS_DA_OP_LOGGED (1u << 8) /* Use intent items to track op */ #define XFS_DA_OP_FLAGS \ { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ @@ -101,7 +102,8 @@ typedef struct xfs_da_args { { XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \ { XFS_DA_OP_NOTIME, "NOTIME" }, \ { XFS_DA_OP_REMOVE, "REMOVE" }, \ - { XFS_DA_OP_RECOVERY, "RECOVERY" } + { XFS_DA_OP_RECOVERY, "RECOVERY" }, \ + { XFS_DA_OP_LOGGED, "LOGGED" } /* * Storage for holding state during Btree searches and split/join ops. diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 4a28c2d77070..135d44133477 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -413,18 +413,20 @@ xfs_attr_create_intent( struct xfs_mount *mp = tp->t_mountp; struct xfs_attri_log_item *attrip; struct xfs_attr_intent *attr; + struct xfs_da_args *args; ASSERT(count == 1); - if (!xfs_sb_version_haslogxattrs(&mp->m_sb)) - return NULL; - /* * Each attr item only performs one attribute operation at a time, so * this is a list of one */ attr = list_first_entry_or_null(items, struct xfs_attr_intent, xattri_list); + args = attr->xattri_da_args; + + if (!(args->op_flags & XFS_DA_OP_LOGGED)) + return NULL; /* * Create a buffer to store the attribute name and value. This buffer @@ -432,8 +434,6 @@ xfs_attr_create_intent( * and the lower level xattr log items. */ if (!attr->xattri_nameval) { - struct xfs_da_args *args = attr->xattri_da_args; - /* * Transfer our reference to the name/value buffer to the * deferred work state structure. @@ -617,7 +617,10 @@ xfs_attri_item_recover( args->namelen = nv->name.i_len; args->hashval = xfs_da_hashname(args->name, args->namelen); args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK; - args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT; + args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT | + XFS_DA_OP_LOGGED; + + ASSERT(xfs_sb_version_haslogxattrs(&mp->m_sb)); switch (attr->xattri_op_flags) { case XFS_ATTRI_OP_FLAGS_SET: diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 5a364a7d58fd..0d67ff8a8961 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1096,7 +1096,8 @@ xfs_flags2diflags2( { uint64_t di_flags2 = (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK | - XFS_DIFLAG2_BIGTIME)); + XFS_DIFLAG2_BIGTIME | + XFS_DIFLAG2_NREXT64)); if (xflags & FS_XFLAG_DAX) di_flags2 |= XFS_DIFLAG2_DAX; diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 35e13e125ec6..c325a28b89a8 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -68,6 +68,18 @@ xfs_attr_rele_log_assist( xlog_drop_incompat_feat(mp->m_log); } +static inline bool +xfs_attr_want_log_assist( + struct xfs_mount *mp) +{ +#ifdef DEBUG + /* Logged xattrs require a V5 super for log_incompat */ + return xfs_has_crc(mp) && xfs_globals.larp; +#else + return false; +#endif +} + /* * Set or remove an xattr, having grabbed the appropriate logging resources * prior to calling libxfs. @@ -80,11 +92,14 @@ xfs_attr_change( bool use_logging = false; int error; - if (xfs_has_larp(mp)) { + ASSERT(!(args->op_flags & XFS_DA_OP_LOGGED)); + + if (xfs_attr_want_log_assist(mp)) { error = xfs_attr_grab_log_assist(mp); if (error) return error; + args->op_flags |= XFS_DA_OP_LOGGED; use_logging = true; } |