summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/afs/rxrpc.c15
-rw-r--r--fs/aio.c6
-rw-r--r--fs/binfmt_elf.c1
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/async-thread.c15
-rw-r--r--fs/btrfs/backref.c7
-rw-r--r--fs/btrfs/btrfs_inode.h44
-rw-r--r--fs/btrfs/compression.c45
-rw-r--r--fs/btrfs/ctree.c128
-rw-r--r--fs/btrfs/ctree.h84
-rw-r--r--fs/btrfs/delayed-inode.c57
-rw-r--r--fs/btrfs/delayed-inode.h16
-rw-r--r--fs/btrfs/delayed-ref.c31
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dir-item.c5
-rw-r--r--fs/btrfs/disk-io.c44
-rw-r--r--fs/btrfs/disk-io.h6
-rw-r--r--fs/btrfs/export.c13
-rw-r--r--fs/btrfs/extent-tree.c299
-rw-r--r--fs/btrfs/extent_io.c213
-rw-r--r--fs/btrfs/extent_io.h5
-rw-r--r--fs/btrfs/file-item.c36
-rw-r--r--fs/btrfs/file.c17
-rw-r--r--fs/btrfs/free-space-cache.c65
-rw-r--r--fs/btrfs/free-space-cache.h7
-rw-r--r--fs/btrfs/free-space-tree.c2
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c620
-rw-r--r--fs/btrfs/ioctl.c101
-rw-r--r--fs/btrfs/ordered-data.c47
-rw-r--r--fs/btrfs/ordered-data.h4
-rw-r--r--fs/btrfs/props.c4
-rw-r--r--fs/btrfs/qgroup.c163
-rw-r--r--fs/btrfs/qgroup.h36
-rw-r--r--fs/btrfs/raid56.c2
-rw-r--r--fs/btrfs/relocation.c25
-rw-r--r--fs/btrfs/root-tree.c6
-rw-r--r--fs/btrfs/scrub.c12
-rw-r--r--fs/btrfs/send.c125
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/transaction.c58
-rw-r--r--fs/btrfs/tree-log.c309
-rw-r--r--fs/btrfs/tree-log.h14
-rw-r--r--fs/btrfs/ulist.c10
-rw-r--r--fs/btrfs/ulist.h8
-rw-r--r--fs/btrfs/uuid-tree.c4
-rw-r--r--fs/btrfs/volumes.c18
-rw-r--r--fs/btrfs/xattr.c16
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/caps.c7
-rw-r--r--fs/ceph/dir.c5
-rw-r--r--fs/ceph/inode.c3
-rw-r--r--fs/ceph/mds_client.c14
-rw-r--r--fs/cifs/Kconfig12
-rw-r--r--fs/cifs/cifsencrypt.c51
-rw-r--r--fs/cifs/cifsfs.c14
-rw-r--r--fs/cifs/cifsglob.h28
-rw-r--r--fs/cifs/cifsproto.h13
-rw-r--r--fs/cifs/cifssmb.c135
-rw-r--r--fs/cifs/connect.c71
-rw-r--r--fs/cifs/file.c62
-rw-r--r--fs/cifs/readdir.c1
-rw-r--r--fs/cifs/sess.c27
-rw-r--r--fs/cifs/smb1ops.c4
-rw-r--r--fs/cifs/smb2glob.h5
-rw-r--r--fs/cifs/smb2maperror.c5
-rw-r--r--fs/cifs/smb2misc.c61
-rw-r--r--fs/cifs/smb2ops.c663
-rw-r--r--fs/cifs/smb2pdu.c575
-rw-r--r--fs/cifs/smb2pdu.h27
-rw-r--r--fs/cifs/smb2proto.h5
-rw-r--r--fs/cifs/smb2transport.c132
-rw-r--r--fs/cifs/transport.c171
-rw-r--r--fs/coredump.c18
-rw-r--r--fs/crypto/Kconfig1
-rw-r--r--fs/crypto/Makefile1
-rw-r--r--fs/crypto/bio.c145
-rw-r--r--fs/crypto/crypto.c157
-rw-r--r--fs/crypto/fname.c4
-rw-r--r--fs/crypto/fscrypt_private.h20
-rw-r--r--fs/crypto/keyinfo.c44
-rw-r--r--fs/crypto/policy.c100
-rw-r--r--fs/dax.c70
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/ecryptfs/crypto.c15
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h9
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/ecryptfs/miscdev.c2
-rw-r--r--fs/ext2/Kconfig1
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/ext4.h69
-rw-r--r--fs/ext4/ext4_jbd2.c11
-rw-r--r--fs/ext4/extents.c27
-rw-r--r--fs/ext4/file.c22
-rw-r--r--fs/ext4/fsync.c3
-rw-r--r--fs/ext4/hash.c71
-rw-r--r--fs/ext4/ialloc.c5
-rw-r--r--fs/ext4/inline.c123
-rw-r--r--fs/ext4/inode.c81
-rw-r--r--fs/ext4/ioctl.c50
-rw-r--r--fs/ext4/mballoc.c25
-rw-r--r--fs/ext4/namei.c34
-rw-r--r--fs/ext4/page-io.c3
-rw-r--r--fs/ext4/resize.c5
-rw-r--r--fs/ext4/super.c60
-rw-r--r--fs/ext4/xattr.c33
-rw-r--r--fs/ext4/xattr.h32
-rw-r--r--fs/f2fs/checkpoint.c36
-rw-r--r--fs/f2fs/data.c162
-rw-r--r--fs/f2fs/debug.c28
-rw-r--r--fs/f2fs/dir.c5
-rw-r--r--fs/f2fs/extent_cache.c22
-rw-r--r--fs/f2fs/f2fs.h619
-rw-r--r--fs/f2fs/file.c28
-rw-r--r--fs/f2fs/gc.c6
-rw-r--r--fs/f2fs/namei.c22
-rw-r--r--fs/f2fs/node.c209
-rw-r--r--fs/f2fs/node.h17
-rw-r--r--fs/f2fs/recovery.c9
-rw-r--r--fs/f2fs/segment.c429
-rw-r--r--fs/f2fs/segment.h27
-rw-r--r--fs/f2fs/super.c76
-rw-r--r--fs/f2fs/xattr.c132
-rw-r--r--fs/f2fs/xattr.h7
-rw-r--r--fs/fscache/cookie.c5
-rw-r--r--fs/fscache/netfs.c1
-rw-r--r--fs/fscache/object.c32
-rw-r--r--fs/fuse/dev.c8
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/gfs2/aops.c4
-rw-r--r--fs/gfs2/bmap.c29
-rw-r--r--fs/gfs2/glock.c12
-rw-r--r--fs/gfs2/incore.h11
-rw-r--r--fs/gfs2/log.c21
-rw-r--r--fs/gfs2/meta_io.c6
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/trans.c81
-rw-r--r--fs/internal.h2
-rw-r--r--fs/iomap.c21
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jfs/resize.c4
-rw-r--r--fs/jfs/super.c4
-rw-r--r--fs/libfs.c3
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/namei.c68
-rw-r--r--fs/namespace.c64
-rw-r--r--fs/ncpfs/sock.c111
-rw-r--r--fs/nfs/nfs4proc.c33
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfsd/export.c1
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4layouts.c5
-rw-r--r--fs/nfsd/nfs4proc.c6
-rw-r--r--fs/nfsd/nfs4state.c19
-rw-r--r--fs/nfsd/nfs4xdr.c23
-rw-r--r--fs/nfsd/nfsctl.c10
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/state.h4
-rw-r--r--fs/nfsd/vfs.c45
-rw-r--r--fs/nfsd/vfs.h6
-rw-r--r--fs/notify/fanotify/fanotify.c11
-rw-r--r--fs/notify/mark.c12
-rw-r--r--fs/ocfs2/dlmglue.c10
-rw-r--r--fs/ocfs2/stackglue.c6
-rw-r--r--fs/ocfs2/stackglue.h3
-rw-r--r--fs/open.c14
-rw-r--r--fs/orangefs/devorangefs-req.c5
-rw-r--r--fs/orangefs/inode.c6
-rw-r--r--fs/orangefs/orangefs-bufmap.c5
-rw-r--r--fs/orangefs/orangefs-debugfs.c15
-rw-r--r--fs/orangefs/orangefs-dev-proto.h3
-rw-r--r--fs/orangefs/orangefs-kernel.h1
-rw-r--r--fs/orangefs/orangefs-mod.c12
-rw-r--r--fs/orangefs/orangefs-sysfs.c32
-rw-r--r--fs/orangefs/upcall.h1
-rw-r--r--fs/overlayfs/copy_up.c91
-rw-r--r--fs/overlayfs/namei.c27
-rw-r--r--fs/overlayfs/overlayfs.h11
-rw-r--r--fs/overlayfs/ovl_entry.h3
-rw-r--r--fs/overlayfs/super.c39
-rw-r--r--fs/overlayfs/util.c30
-rw-r--r--fs/posix_acl.c9
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/page.c3
-rw-r--r--fs/proc/proc_sysctl.c3
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/pstore/ram_core.c12
-rw-r--r--fs/read_write.c9
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/splice.c23
-rw-r--r--fs/ubifs/Kconfig2
-rw-r--r--fs/ubifs/crypto.c11
-rw-r--r--fs/ubifs/dir.c58
-rw-r--r--fs/ubifs/ioctl.c3
-rw-r--r--fs/ubifs/journal.c2
-rw-r--r--fs/ubifs/tnc.c25
-rw-r--r--fs/ubifs/ubifs.h28
-rw-r--r--fs/udf/ecma_167.h98
-rw-r--r--fs/udf/file.c46
-rw-r--r--fs/udf/inode.c118
-rw-r--r--fs/udf/lowlevel.c2
-rw-r--r--fs/udf/misc.c2
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/udf/osta_udf.h34
-rw-r--r--fs/udf/super.c53
-rw-r--r--fs/udf/symlink.c30
-rw-r--r--fs/udf/udfdecl.h2
-rw-r--r--fs/userfaultfd.c37
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c73
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c208
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h2
-rw-r--r--fs/xfs/libxfs/xfs_attr.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c228
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h6
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c13
-rw-r--r--fs/xfs/libxfs/xfs_btree.c48
-rw-r--r--fs/xfs/libxfs/xfs_btree.h8
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h2
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c39
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h8
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c51
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c90
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c10
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c9
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h1
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c9
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c14
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_sb.c2
-rw-r--r--fs/xfs/xfs_aops.c25
-rw-r--r--fs/xfs/xfs_bmap_util.c96
-rw-r--r--fs/xfs/xfs_bmap_util.h5
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_buf_item.c1
-rw-r--r--fs/xfs/xfs_discard.c29
-rw-r--r--fs/xfs/xfs_discard.h1
-rw-r--r--fs/xfs/xfs_dquot.c4
-rw-r--r--fs/xfs/xfs_extent_busy.c156
-rw-r--r--fs/xfs/xfs_extent_busy.h11
-rw-r--r--fs/xfs/xfs_file.c34
-rw-r--r--fs/xfs/xfs_fsops.c53
-rw-r--r--fs/xfs/xfs_icache.c62
-rw-r--r--fs/xfs/xfs_icache.h2
-rw-r--r--fs/xfs/xfs_inode.c74
-rw-r--r--fs/xfs/xfs_ioctl.c4
-rw-r--r--fs/xfs/xfs_iomap.c69
-rw-r--r--fs/xfs/xfs_iomap.h24
-rw-r--r--fs/xfs/xfs_iops.c50
-rw-r--r--fs/xfs/xfs_linux.h6
-rw-r--r--fs/xfs/xfs_log.c12
-rw-r--r--fs/xfs/xfs_log.h1
-rw-r--r--fs/xfs/xfs_log_cil.c84
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_mount.c33
-rw-r--r--fs/xfs/xfs_mount.h18
-rw-r--r--fs/xfs/xfs_qm.c3
-rw-r--r--fs/xfs/xfs_refcount_item.c3
-rw-r--r--fs/xfs/xfs_reflink.c265
-rw-r--r--fs/xfs/xfs_reflink.h6
-rw-r--r--fs/xfs/xfs_super.c8
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_sysfs.c18
-rw-r--r--fs/xfs/xfs_trace.h13
-rw-r--r--fs/xfs/xfs_trans.h1
277 files changed, 7301 insertions, 4332 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index c2a377cdda2b..83eab52fb3f6 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -38,6 +38,7 @@ config FS_DAX
bool "Direct Access (DAX) support"
depends on MMU
depends on !(ARM || MIPS || SPARC)
+ select FS_IOMAP
help
Direct Access (DAX) can be used on memory-backed block devices.
If the block device supports DAX and the filesystem supports DAX,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 25f05a8d21b1..bfba8f0c3d43 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -249,8 +249,7 @@ void afs_flat_call_destructor(struct afs_call *call)
/*
* attach the data from a bunch of pages on an inode to a call
*/
-static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
- struct kvec *iov)
+static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
{
struct page *pages[8];
unsigned count, n, loop, offset, to;
@@ -273,20 +272,21 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
loop = 0;
do {
+ struct bio_vec bvec = {.bv_page = pages[loop],
+ .bv_offset = offset};
msg->msg_flags = 0;
to = PAGE_SIZE;
if (first + loop >= last)
to = call->last_to;
else
msg->msg_flags = MSG_MORE;
- iov->iov_base = kmap(pages[loop]) + offset;
- iov->iov_len = to - offset;
+ bvec.bv_len = to - offset;
offset = 0;
_debug("- range %u-%u%s",
offset, to, msg->msg_flags ? " [more]" : "");
- iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC,
- iov, 1, to - offset);
+ iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
+ &bvec, 1, to - offset);
/* have to change the state *before* sending the last
* packet as RxRPC might give us the reply before it
@@ -295,7 +295,6 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
msg, to - offset);
- kunmap(pages[loop]);
if (ret < 0)
break;
} while (++loop < count);
@@ -379,7 +378,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
goto error_do_abort;
if (call->send_pages) {
- ret = afs_send_pages(call, &msg, iov);
+ ret = afs_send_pages(call, &msg);
if (ret < 0)
goto error_do_abort;
}
diff --git a/fs/aio.c b/fs/aio.c
index 4ab67e8cb776..873b4ca82ccb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1085,7 +1085,8 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
* Tell lockdep we inherited freeze protection from submission
* thread.
*/
- __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ if (S_ISREG(file_inode(file)->i_mode))
+ __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
file_end_write(file);
}
@@ -1525,7 +1526,8 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
* by telling it the lock got released so that it doesn't
* complain about held lock when we return to userspace.
*/
- __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ if (S_ISREG(file_inode(file)->i_mode))
+ __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
}
kfree(iovec);
return ret;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 29a02daf08a9..422370293cfd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2298,6 +2298,7 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump;
}
}
+ dump_truncate(cprm);
if (!elf_core_write_extra_data(cprm))
goto end_coredump;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6254cee8f8f3..3c47614a4b32 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -328,9 +328,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
struct file *file = iocb->ki_filp;
struct inode *inode = bdev_file_inode(file);
struct block_device *bdev = I_BDEV(inode);
+ struct blk_plug plug;
struct blkdev_dio *dio;
struct bio *bio;
- bool is_read = (iov_iter_rw(iter) == READ);
+ bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
int ret;
@@ -343,7 +344,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
bio_get(bio); /* extra ref for the completion handler */
dio = container_of(bio, struct blkdev_dio, bio);
- dio->is_sync = is_sync_kiocb(iocb);
+ dio->is_sync = is_sync = is_sync_kiocb(iocb);
if (dio->is_sync)
dio->waiter = current;
else
@@ -353,6 +354,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
dio->multi_bio = false;
dio->should_dirty = is_read && (iter->type == ITER_IOVEC);
+ blk_start_plug(&plug);
for (;;) {
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = pos >> 9;
@@ -394,8 +396,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
submit_bio(bio);
bio = bio_alloc(GFP_KERNEL, nr_pages);
}
+ blk_finish_plug(&plug);
- if (!dio->is_sync)
+ if (!is_sync)
return -EIOCBQUEUED;
for (;;) {
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 63d197724519..ff0b0be92d61 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -273,6 +273,8 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
unsigned long flags;
while (1) {
+ void *wtag;
+
spin_lock_irqsave(lock, flags);
if (list_empty(list))
break;
@@ -299,11 +301,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
spin_unlock_irqrestore(lock, flags);
/*
- * we don't want to call the ordered free functions
- * with the lock held though
+ * We don't want to call the ordered free functions with the
+ * lock held though. Save the work as tag for the trace event,
+ * because the callback could free the structure.
*/
+ wtag = work;
work->ordered_free(work);
- trace_btrfs_all_work_done(work);
+ trace_btrfs_all_work_done(wq->fs_info, wtag);
}
spin_unlock_irqrestore(lock, flags);
}
@@ -311,6 +315,7 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
static void normal_work_helper(struct btrfs_work *work)
{
struct __btrfs_workqueue *wq;
+ void *wtag;
int need_order = 0;
/*
@@ -324,6 +329,8 @@ static void normal_work_helper(struct btrfs_work *work)
if (work->ordered_func)
need_order = 1;
wq = work->wq;
+ /* Safe for tracepoints in case work gets freed by the callback */
+ wtag = work;
trace_btrfs_work_sched(work);
thresh_exec_hook(wq);
@@ -333,7 +340,7 @@ static void normal_work_helper(struct btrfs_work *work)
run_ordered_work(wq);
}
if (!need_order)
- trace_btrfs_all_work_done(work);
+ trace_btrfs_all_work_done(wq->fs_info, wtag);
}
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8299601a3549..7699e16784d3 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -956,8 +956,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
/*
* add all inline backrefs for bytenr to the list
*/
-static int __add_inline_refs(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path, u64 bytenr,
+static int __add_inline_refs(struct btrfs_path *path, u64 bytenr,
int *info_level, struct list_head *prefs,
struct ref_root *ref_tree,
u64 *total_refs, u64 inum)
@@ -1284,7 +1283,7 @@ again:
*/
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
@@ -1354,7 +1353,7 @@ again:
if (key.objectid == bytenr &&
(key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY)) {
- ret = __add_inline_refs(fs_info, path, bytenr,
+ ret = __add_inline_refs(path, bytenr,
&info_level, &prefs,
ref_tree, &total_refs,
inum);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 1a8fa46ff87e..819a6d27218a 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -224,16 +224,16 @@ static inline void btrfs_insert_inode_hash(struct inode *inode)
__insert_inode_hash(inode, h);
}
-static inline u64 btrfs_ino(struct inode *inode)
+static inline u64 btrfs_ino(struct btrfs_inode *inode)
{
- u64 ino = BTRFS_I(inode)->location.objectid;
+ u64 ino = inode->location.objectid;
/*
* !ino: btree_inode
* type == BTRFS_ROOT_ITEM_KEY: subvol dir
*/
- if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY)
- ino = inode->i_ino;
+ if (!ino || inode->location.type == BTRFS_ROOT_ITEM_KEY)
+ ino = inode->vfs_inode.i_ino;
return ino;
}
@@ -248,23 +248,21 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (root == root->fs_info->tree_root &&
- btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
+ btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID)
return true;
if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
-static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
+static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
int ret = 0;
- spin_lock(&BTRFS_I(inode)->lock);
- if (BTRFS_I(inode)->logged_trans == generation &&
- BTRFS_I(inode)->last_sub_trans <=
- BTRFS_I(inode)->last_log_commit &&
- BTRFS_I(inode)->last_sub_trans <=
- BTRFS_I(inode)->root->last_log_commit) {
+ spin_lock(&inode->lock);
+ if (inode->logged_trans == generation &&
+ inode->last_sub_trans <= inode->last_log_commit &&
+ inode->last_sub_trans <= inode->root->last_log_commit) {
/*
* After a ranged fsync we might have left some extent maps
* (that fall outside the fsync's range). So return false
@@ -272,10 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
* will be called and process those extent maps.
*/
smp_mb();
- if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
+ if (list_empty(&inode->extent_tree.modified_extents))
ret = 1;
}
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_unlock(&inode->lock);
return ret;
}
@@ -326,6 +324,24 @@ static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
&BTRFS_I(inode)->runtime_flags);
}
+static inline void btrfs_print_data_csum_error(struct inode *inode,
+ u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ /* Output minus objectid, which is more meaningful */
+ if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
+ btrfs_warn_rl(root->fs_info,
+ "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+ root->objectid, btrfs_ino(BTRFS_I(inode)),
+ logical_start, csum, csum_expected, mirror_num);
+ else
+ btrfs_warn_rl(root->fs_info,
+ "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+ root->objectid, btrfs_ino(BTRFS_I(inode)),
+ logical_start, csum, csum_expected, mirror_num);
+}
+
bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
#endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7f390849343b..903c32c9eb22 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -124,10 +124,8 @@ static int check_compressed_csum(struct inode *inode,
kunmap_atomic(kaddr);
if (csum != *cb_sum) {
- btrfs_info(BTRFS_I(inode)->root->fs_info,
- "csum failed ino %llu extent %llu csum %u wanted %u mirror %d",
- btrfs_ino(inode), disk_start, csum, *cb_sum,
- cb->mirror_num);
+ btrfs_print_data_csum_error(inode, disk_start, csum,
+ *cb_sum, cb->mirror_num);
ret = -EIO;
goto fail;
}
@@ -1024,6 +1022,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
unsigned long buf_offset;
unsigned long current_buf_start;
unsigned long start_byte;
+ unsigned long prev_start_byte;
unsigned long working_bytes = total_out - buf_start;
unsigned long bytes;
char *kaddr;
@@ -1071,26 +1070,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
if (!bio->bi_iter.bi_size)
return 0;
bvec = bio_iter_iovec(bio, bio->bi_iter);
-
+ prev_start_byte = start_byte;
start_byte = page_offset(bvec.bv_page) - disk_start;
/*
- * make sure our new page is covered by this
- * working buffer
+ * We need to make sure we're only adjusting
+ * our offset into compression working buffer when
+ * we're switching pages. Otherwise we can incorrectly
+ * keep copying when we were actually done.
*/
- if (total_out <= start_byte)
- return 1;
+ if (start_byte != prev_start_byte) {
+ /*
+ * make sure our new page is covered by this
+ * working buffer
+ */
+ if (total_out <= start_byte)
+ return 1;
- /*
- * the next page in the biovec might not be adjacent
- * to the last page, but it might still be found
- * inside this working buffer. bump our offset pointer
- */
- if (total_out > start_byte &&
- current_buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes = total_out - start_byte;
- current_buf_start = buf_start + buf_offset;
+ /*
+ * the next page in the biovec might not be adjacent
+ * to the last page, but it might still be found
+ * inside this working buffer. bump our offset pointer
+ */
+ if (total_out > start_byte &&
+ current_buf_start < start_byte) {
+ buf_offset = start_byte - buf_start;
+ working_bytes = total_out - start_byte;
+ current_buf_start = buf_start + buf_offset;
+ }
}
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a426dc822d4d..1192bc7d2ee7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -28,9 +28,9 @@
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, int level);
-static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *ins_key,
- struct btrfs_path *path, int data_size, int extend);
+static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *ins_key, struct btrfs_path *path,
+ int data_size, int extend);
static int push_node_left(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct extent_buffer *dst,
@@ -426,7 +426,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
tm_root = &fs_info->tree_mod_log;
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
- tm = container_of(node, struct tree_mod_elem, node);
+ tm = rb_entry(node, struct tree_mod_elem, node);
if (tm->seq > min_seq)
continue;
rb_erase(node, tm_root);
@@ -460,7 +460,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
while (*new) {
- cur = container_of(*new, struct tree_mod_elem, node);
+ cur = rb_entry(*new, struct tree_mod_elem, node);
parent = *new;
if (cur->logical < tm->logical)
new = &((*new)->rb_left);
@@ -746,7 +746,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
tm_root = &fs_info->tree_mod_log;
node = tm_root->rb_node;
while (node) {
- cur = container_of(node, struct tree_mod_elem, node);
+ cur = rb_entry(node, struct tree_mod_elem, node);
if (cur->logical < start) {
node = node->rb_left;
} else if (cur->logical > start) {
@@ -1074,7 +1074,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, buf, 1);
BUG_ON(ret); /* -ENOMEM */
}
- clean_tree_block(trans, fs_info, buf);
+ clean_tree_block(fs_info, buf);
*last_ref = 1;
}
return 0;
@@ -1326,7 +1326,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
next = rb_next(&tm->node);
if (!next)
break;
- tm = container_of(next, struct tree_mod_elem, node);
+ tm = rb_entry(next, struct tree_mod_elem, node);
if (tm->logical != first_tm->logical)
break;
}
@@ -1580,7 +1580,8 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
/*
* compare two keys in a memcmp fashion
*/
-static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
+static int comp_keys(const struct btrfs_disk_key *disk,
+ const struct btrfs_key *k2)
{
struct btrfs_key k1;
@@ -1592,7 +1593,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
/*
* same as comp_keys only with two btrfs_key's
*/
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
{
if (k1->objectid > k2->objectid)
return 1;
@@ -1732,8 +1733,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
* slot may point to max if the key is bigger than all of the keys
*/
static noinline int generic_bin_search(struct extent_buffer *eb,
- unsigned long p,
- int item_size, struct btrfs_key *key,
+ unsigned long p, int item_size,
+ const struct btrfs_key *key,
int max, int *slot)
{
int low = 0;
@@ -1802,7 +1803,7 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
* simple bin_search frontend that does the right thing for
* leaves vs nodes
*/
-static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot)
{
if (level == 0)
@@ -1819,7 +1820,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
slot);
}
-int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot)
{
return bin_search(eb, key, level, slot);
@@ -1937,7 +1938,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
path->locks[level] = 0;
path->nodes[level] = NULL;
- clean_tree_block(trans, fs_info, mid);
+ clean_tree_block(fs_info, mid);
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
@@ -1998,7 +1999,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
- clean_tree_block(trans, fs_info, right);
+ clean_tree_block(fs_info, right);
btrfs_tree_unlock(right);
del_ptr(root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
@@ -2042,7 +2043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
- clean_tree_block(trans, fs_info, mid);
+ clean_tree_block(fs_info, mid);
btrfs_tree_unlock(mid);
del_ptr(root, path, level + 1, pslot);
root_sub_used(root, mid->len);
@@ -2437,10 +2438,9 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
* reada. -EAGAIN is returned and the search must be repeated.
*/
static int
-read_block_for_search(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *p,
- struct extent_buffer **eb_ret, int level, int slot,
- struct btrfs_key *key, u64 time_seq)
+read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
+ struct extent_buffer **eb_ret, int level, int slot,
+ const struct btrfs_key *key)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 blocknr;
@@ -2587,7 +2587,7 @@ done:
}
static void key_search_validate(struct extent_buffer *b,
- struct btrfs_key *key,
+ const struct btrfs_key *key,
int level)
{
#ifdef CONFIG_BTRFS_ASSERT
@@ -2606,7 +2606,7 @@ static void key_search_validate(struct extent_buffer *b,
#endif
}
-static int key_search(struct extent_buffer *b, struct btrfs_key *key,
+static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
int level, int *prev_cmp, int *slot)
{
if (*prev_cmp != 0) {
@@ -2668,9 +2668,9 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
* tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
* possible)
*/
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_path *p, int
- ins_len, int cow)
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *key, struct btrfs_path *p,
+ int ins_len, int cow)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *b;
@@ -2870,8 +2870,8 @@ cow_done:
goto done;
}
- err = read_block_for_search(trans, root, p,
- &b, level, slot, key, 0);
+ err = read_block_for_search(root, p, &b, level,
+ slot, key);
if (err == -EAGAIN)
goto again;
if (err) {
@@ -2953,7 +2953,7 @@ done:
* The resulting path and return value will be set up as if we called
* btrfs_search_slot at that point in time with ins_len and cow both set to 0.
*/
-int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
+int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
struct btrfs_path *p, u64 time_seq)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -3014,8 +3014,8 @@ again:
goto done;
}
- err = read_block_for_search(NULL, root, p, &b, level,
- slot, key, time_seq);
+ err = read_block_for_search(root, p, &b, level,
+ slot, key);
if (err == -EAGAIN)
goto again;
if (err) {
@@ -3067,8 +3067,9 @@ done:
* < 0 on error
*/
int btrfs_search_slot_for_read(struct btrfs_root *root,
- struct btrfs_key *key, struct btrfs_path *p,
- int find_higher, int return_any)
+ const struct btrfs_key *key,
+ struct btrfs_path *p, int find_higher,
+ int return_any)
{
int ret;
struct extent_buffer *leaf;
@@ -3166,7 +3167,7 @@ static void fixup_low_keys(struct btrfs_fs_info *fs_info,
*/
void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- struct btrfs_key *new_key)
+ const struct btrfs_key *new_key)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *eb;
@@ -3594,8 +3595,7 @@ noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
* min slot controls the lowest index we're willing to push to the
* right. We'll push up to and including min_slot, but no lower
*/
-static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
+static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
@@ -3704,7 +3704,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
else
- clean_tree_block(trans, fs_info, left);
+ clean_tree_block(fs_info, left);
btrfs_mark_buffer_dirty(right);
@@ -3716,7 +3716,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (path->slots[0] >= left_nritems) {
path->slots[0] -= left_nritems;
if (btrfs_header_nritems(path->nodes[0]) == 0)
- clean_tree_block(trans, fs_info, path->nodes[0]);
+ clean_tree_block(fs_info, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3809,7 +3809,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
return 0;
}
- return __push_leaf_right(trans, fs_info, path, min_data_size, empty,
+ return __push_leaf_right(fs_info, path, min_data_size, empty,
right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
@@ -3825,8 +3825,7 @@ out_unlock:
* item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
* items
*/
-static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
+static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
int free_space, u32 right_nritems,
@@ -3945,7 +3944,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
if (right_nritems)
btrfs_mark_buffer_dirty(right);
else
- clean_tree_block(trans, fs_info, right);
+ clean_tree_block(fs_info, right);
btrfs_item_key(right, &disk_key, 0);
fixup_low_keys(fs_info, path, &disk_key, 1);
@@ -4035,7 +4034,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- return __push_leaf_left(trans, fs_info, path, min_data_size,
+ return __push_leaf_left(fs_info, path, min_data_size,
empty, left, free_space, right_nritems,
max_slot);
out:
@@ -4180,7 +4179,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
*/
static noinline int split_leaf(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct btrfs_key *ins_key,
+ const struct btrfs_key *ins_key,
struct btrfs_path *path, int data_size,
int extend)
{
@@ -4412,10 +4411,9 @@ err:
return ret;
}
-static noinline int split_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
+static noinline int split_item(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- struct btrfs_key *new_key,
+ const struct btrfs_key *new_key,
unsigned long split_offset)
{
struct extent_buffer *leaf;
@@ -4501,7 +4499,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *new_key,
+ const struct btrfs_key *new_key,
unsigned long split_offset)
{
int ret;
@@ -4510,7 +4508,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- ret = split_item(trans, root->fs_info, path, new_key, split_offset);
+ ret = split_item(root->fs_info, path, new_key, split_offset);
return ret;
}
@@ -4525,7 +4523,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *new_key)
+ const struct btrfs_key *new_key)
{
struct extent_buffer *leaf;
int ret;
@@ -4726,7 +4724,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
* that doesn't call btrfs_search_slot
*/
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
+ const struct btrfs_key *cpu_key, u32 *data_size,
u32 total_data, u32 total_size, int nr)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4820,7 +4818,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
+ const struct btrfs_key *cpu_key, u32 *data_size,
int nr)
{
int ret = 0;
@@ -4851,9 +4849,9 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
* Given a key and some data, insert an item into the tree.
* This does all the path init required, making room in the tree if needed.
*/
-int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *cpu_key, void *data, u32
- data_size)
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *cpu_key, void *data,
+ u32 data_size)
{
int ret = 0;
struct btrfs_path *path;
@@ -5008,7 +5006,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_header_level(leaf, 0);
} else {
btrfs_set_path_blocking(path);
- clean_tree_block(trans, fs_info, leaf);
+ clean_tree_block(fs_info, leaf);
btrfs_del_leaf(trans, root, path, leaf);
}
} else {
@@ -5243,7 +5241,7 @@ out:
static int tree_move_down(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- int *level, int root_level)
+ int *level)
{
struct extent_buffer *eb;
@@ -5258,8 +5256,7 @@ static int tree_move_down(struct btrfs_fs_info *fs_info,
return 0;
}
-static int tree_move_next_or_upnext(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+static int tree_move_next_or_upnext(struct btrfs_path *path,
int *level, int root_level)
{
int ret = 0;
@@ -5298,10 +5295,9 @@ static int tree_advance(struct btrfs_fs_info *fs_info,
int ret;
if (*level == 0 || !allow_down) {
- ret = tree_move_next_or_upnext(fs_info, path, level,
- root_level);
+ ret = tree_move_next_or_upnext(path, level, root_level);
} else {
- ret = tree_move_down(fs_info, path, level, root_level);
+ ret = tree_move_down(fs_info, path, level);
}
if (ret >= 0) {
if (*level == 0)
@@ -5784,8 +5780,8 @@ again:
next = c;
next_rw_lock = path->locks[level];
- ret = read_block_for_search(NULL, root, path, &next, level,
- slot, &key, 0);
+ ret = read_block_for_search(root, path, &next, level,
+ slot, &key);
if (ret == -EAGAIN)
goto again;
@@ -5834,8 +5830,8 @@ again:
if (!level)
break;
- ret = read_block_for_search(NULL, root, path, &next, level,
- 0, &key, 0);
+ ret = read_block_for_search(root, path, &next, level,
+ 0, &key);
if (ret == -EAGAIN)
goto again;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6a823719b6c5..ad23a73ac7e7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -97,6 +97,14 @@ static const int btrfs_csum_sizes[] = { 4 };
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
+/*
+ * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+ */
+static inline u32 count_max_extents(u64 size)
+{
+ return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+}
+
struct btrfs_mapping_tree {
struct extent_map_tree map_tree;
};
@@ -1953,7 +1961,7 @@ BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
- struct btrfs_disk_key *disk)
+ const struct btrfs_disk_key *disk)
{
cpu->offset = le64_to_cpu(disk->offset);
cpu->type = disk->type;
@@ -1961,7 +1969,7 @@ static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
}
static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
- struct btrfs_key *cpu)
+ const struct btrfs_key *cpu)
{
disk->offset = cpu_to_le64(cpu->offset);
disk->type = cpu->type;
@@ -1993,8 +2001,7 @@ static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
btrfs_disk_key_to_cpu(key, &disk_key);
}
-
-static inline u8 btrfs_key_type(struct btrfs_key *key)
+static inline u8 btrfs_key_type(const struct btrfs_key *key)
{
return key->type;
}
@@ -2577,8 +2584,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes);
int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_cross_ref_exist(struct btrfs_root *root,
u64 objectid, u64 offset, u64 bytenr);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
@@ -2587,10 +2593,11 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int get_block_group_index(struct btrfs_block_group_cache *cache);
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 parent,
- u64 root_objectid,
- struct btrfs_disk_key *key, int level,
- u64 hint, u64 empty_size);
+ struct btrfs_root *root,
+ u64 parent, u64 root_objectid,
+ const struct btrfs_disk_key *key,
+ int level, u64 hint,
+ u64 empty_size);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
@@ -2623,8 +2630,7 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len, int delalloc);
int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len);
-void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2696,8 +2702,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
int nitems,
u64 *qgroup_reserved, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv,
- u64 qgroup_reserved);
+ struct btrfs_block_rsv *rsv);
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
@@ -2724,7 +2729,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
-int btrfs_inc_block_group_ro(struct btrfs_root *root,
+int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache);
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
@@ -2750,9 +2755,9 @@ u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end);
/* ctree.c */
-int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot);
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
@@ -2760,7 +2765,7 @@ int btrfs_previous_extent_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid);
void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- struct btrfs_key *new_key);
+ const struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
@@ -2802,22 +2807,23 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *new_key,
+ const struct btrfs_key *new_key,
unsigned long split_offset);
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *new_key);
+ const struct btrfs_key *new_key);
int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_path *p, int
- ins_len, int cow);
-int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *key, struct btrfs_path *p,
+ int ins_len, int cow);
+int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
struct btrfs_path *p, u64 time_seq);
int btrfs_search_slot_for_read(struct btrfs_root *root,
- struct btrfs_key *key, struct btrfs_path *p,
- int find_higher, int return_any);
+ const struct btrfs_key *key,
+ struct btrfs_path *p, int find_higher,
+ int return_any);
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, u64 *last_ret,
@@ -2840,19 +2846,20 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
}
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
+ const struct btrfs_key *cpu_key, u32 *data_size,
u32 total_data, u32 total_size, int nr);
-int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, void *data, u32 data_size);
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *key, void *data, u32 data_size);
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size, int nr);
+ const struct btrfs_key *cpu_key, u32 *data_size,
+ int nr);
static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *key,
+ const struct btrfs_key *key,
u32 data_size)
{
return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
@@ -2941,15 +2948,15 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
const char *name, int name_len);
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_key *key);
-int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_root_item
- *item);
+ const struct btrfs_key *key);
+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *key,
+ struct btrfs_root_item *item);
int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_key *key,
struct btrfs_root_item *item);
-int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
+int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
struct btrfs_path *path, struct btrfs_root_item *root_item,
struct btrfs_key *root_key);
int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info);
@@ -3119,7 +3126,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
const char *name, int name_len);
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct inode *parent_inode, struct inode *inode,
@@ -3447,7 +3454,8 @@ do { \
"BTRFS: Transaction aborted (error %d)\n", \
(errno)); \
} else { \
- pr_debug("BTRFS: Transaction aborted (error %d)\n", \
+ btrfs_debug((trans)->fs_info, \
+ "Transaction aborted (error %d)", \
(errno)); \
} \
} \
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 80982a83c9fd..f7a6ee5ccc80 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -72,14 +72,14 @@ static inline int btrfs_is_continuous_delayed_item(
return 0;
}
-static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
+static struct btrfs_delayed_node *btrfs_get_delayed_node(
+ struct btrfs_inode *btrfs_inode)
{
- struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(btrfs_inode);
struct btrfs_delayed_node *node;
- node = ACCESS_ONCE(btrfs_inode->delayed_node);
+ node = READ_ONCE(btrfs_inode->delayed_node);
if (node) {
atomic_inc(&node->refs);
return node;
@@ -107,16 +107,15 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
/* Will return either the node or PTR_ERR(-ENOMEM) */
static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
- struct inode *inode)
+ struct btrfs_inode *btrfs_inode)
{
struct btrfs_delayed_node *node;
- struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(btrfs_inode);
int ret;
again:
- node = btrfs_get_delayed_node(inode);
+ node = btrfs_get_delayed_node(btrfs_inode);
if (node)
return node;
@@ -574,7 +573,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_delayed_node *node)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -603,13 +602,13 @@ static int btrfs_delayed_inode_reserve_metadata(
* worth which is less likely to hurt us.
*/
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
- spin_lock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags))
+ &inode->runtime_flags))
release = true;
else
src_rsv = NULL;
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_unlock(&inode->lock);
}
/*
@@ -1196,7 +1195,7 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
}
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
@@ -1233,9 +1232,9 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
return ret;
}
-int btrfs_commit_inode_delayed_inode(struct inode *inode)
+int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct btrfs_trans_handle *trans;
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
@@ -1288,15 +1287,15 @@ out:
return ret;
}
-void btrfs_remove_delayed_node(struct inode *inode)
+void btrfs_remove_delayed_node(struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node;
- delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
+ delayed_node = READ_ONCE(inode->delayed_node);
if (!delayed_node)
return;
- BTRFS_I(inode)->delayed_node = NULL;
+ inode->delayed_node = NULL;
btrfs_release_delayed_node(delayed_node);
}
@@ -1434,7 +1433,7 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
const char *name, int name_len,
- struct inode *dir,
+ struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 type,
u64 index)
{
@@ -1510,7 +1509,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct inode *dir, u64 index)
+ struct btrfs_inode *dir, u64 index)
{
struct btrfs_delayed_node *node;
struct btrfs_delayed_item *item;
@@ -1558,7 +1557,7 @@ end:
return ret;
}
-int btrfs_inode_delayed_dir_index_count(struct inode *inode)
+int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
@@ -1575,7 +1574,7 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
return -EINVAL;
}
- BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
+ inode->index_cnt = delayed_node->index_cnt;
btrfs_release_delayed_node(delayed_node);
return 0;
}
@@ -1587,7 +1586,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *item;
- delayed_node = btrfs_get_delayed_node(inode);
+ delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
if (!delayed_node)
return false;
@@ -1776,7 +1775,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
struct btrfs_delayed_node *delayed_node;
struct btrfs_inode_item *inode_item;
- delayed_node = btrfs_get_delayed_node(inode);
+ delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
if (!delayed_node)
return -ENOENT;
@@ -1831,7 +1830,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *delayed_node;
int ret = 0;
- delayed_node = btrfs_get_or_create_delayed_node(inode);
+ delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
@@ -1841,7 +1840,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
goto release_node;
}
- ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
+ ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
delayed_node);
if (ret)
goto release_node;
@@ -1856,9 +1855,9 @@ release_node:
return ret;
}
-int btrfs_delayed_delete_inode_ref(struct inode *inode)
+int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct btrfs_delayed_node *delayed_node;
/*
@@ -1933,7 +1932,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
mutex_unlock(&delayed_node->mutex);
}
-void btrfs_kill_delayed_inode_items(struct inode *inode)
+void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 8a2bf5e3e4cf..40327cc3b99a 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -101,15 +101,15 @@ static inline void btrfs_init_delayed_root(
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
const char *name, int name_len,
- struct inode *dir,
+ struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 type,
u64 index);
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct inode *dir, u64 index);
+ struct btrfs_inode *dir, u64 index);
-int btrfs_inode_delayed_dir_index_count(struct inode *inode);
+int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
@@ -119,17 +119,17 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info);
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
- struct inode *inode);
+ struct btrfs_inode *inode);
/* Used for evicting the inode. */
-void btrfs_remove_delayed_node(struct inode *inode);
-void btrfs_kill_delayed_inode_items(struct inode *inode);
-int btrfs_commit_inode_delayed_inode(struct inode *inode);
+void btrfs_remove_delayed_node(struct btrfs_inode *inode);
+void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode);
+int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
int btrfs_fill_inode(struct inode *inode, u32 *rdev);
-int btrfs_delayed_delete_inode_ref(struct inode *inode);
+int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode);
/* Used for drop dead root */
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index ef724a5fc30e..6eb80952efb3 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -550,13 +550,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *ref,
struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
- int action, int is_data)
+ int action, int is_data, int *qrecord_inserted_ret)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1;
int must_insert_reserved = 0;
+ int qrecord_inserted = 0;
/* If reserved is provided, it must be a data extent. */
BUG_ON(!is_data && reserved);
@@ -623,6 +624,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
if(btrfs_qgroup_trace_extent_nolock(fs_info,
delayed_refs, qrecord))
kfree(qrecord);
+ else
+ qrecord_inserted = 1;
}
spin_lock_init(&head_ref->lock);
@@ -650,6 +653,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
+ if (qrecord_inserted_ret)
+ *qrecord_inserted_ret = qrecord_inserted;
return head_ref;
}
@@ -779,6 +784,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
+ int qrecord_inserted;
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@@ -806,12 +812,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
- bytenr, num_bytes, 0, 0, action, 0);
+ bytenr, num_bytes, 0, 0, action, 0,
+ &qrecord_inserted);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action);
spin_unlock(&delayed_refs->lock);
+ if (qrecord_inserted)
+ return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
free_head_ref:
@@ -829,15 +838,14 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
- u64 owner, u64 offset, u64 reserved, int action,
- struct btrfs_delayed_extent_op *extent_op)
+ u64 owner, u64 offset, u64 reserved, int action)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
+ int qrecord_inserted;
- BUG_ON(extent_op && !extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
@@ -859,7 +867,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
}
}
- head_ref->extent_op = extent_op;
+ head_ref->extent_op = NULL;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
@@ -870,13 +878,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, ref_root, reserved,
- action, 1);
+ action, 1, &qrecord_inserted);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action);
spin_unlock(&delayed_refs->lock);
+ if (qrecord_inserted)
+ return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
}
@@ -899,7 +909,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data);
+ extent_op->is_data, NULL);
spin_unlock(&delayed_refs->lock);
return 0;
@@ -911,11 +921,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
* the head node if any where found, or NULL if not.
*/
struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
+btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
{
- struct btrfs_delayed_ref_root *delayed_refs;
-
- delayed_refs = &trans->transaction->delayed_refs;
return find_ref_head(&delayed_refs->href_root, bytenr, 0);
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 50947b5a9152..0e537f98f1a1 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -250,8 +250,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
- u64 owner, u64 offset, u64 reserved, int action,
- struct btrfs_delayed_extent_op *extent_op);
+ u64 owner, u64 offset, u64 reserved, int action);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
@@ -262,7 +261,8 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
+btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
+ u64 bytenr);
int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index b039fe0c751a..724504a2d7ac 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -133,7 +133,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
struct btrfs_disk_key disk_key;
u32 data_size;
- key.objectid = btrfs_ino(dir);
+ key.objectid = btrfs_ino(BTRFS_I(dir));
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
@@ -174,8 +174,7 @@ second_insert:
btrfs_release_path(path);
ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name,
- name_len, dir, &disk_key, type,
- index);
+ name_len, BTRFS_I(dir), &disk_key, type, index);
out_free:
btrfs_free_path(path);
if (ret)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 18004169552c..32a9ec11888d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -64,8 +64,7 @@
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
- int read_only);
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
@@ -1005,7 +1004,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
return ret;
}
-static int check_async_write(struct inode *inode, unsigned long bio_flags)
+static int check_async_write(unsigned long bio_flags)
{
if (bio_flags & EXTENT_BIO_TREE_LOG)
return 0;
@@ -1021,7 +1020,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int async = check_async_write(inode, bio_flags);
+ int async = check_async_write(bio_flags);
int ret;
if (bio_op(bio) != REQ_OP_WRITE) {
@@ -1248,8 +1247,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
}
-void clean_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
+void clean_tree_block(struct btrfs_fs_info *fs_info,
struct extent_buffer *buf)
{
if (btrfs_header_generation(buf) ==
@@ -2207,11 +2205,9 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->delalloc_workers);
btrfs_destroy_workqueue(fs_info->workers);
btrfs_destroy_workqueue(fs_info->endio_workers);
- btrfs_destroy_workqueue(fs_info->endio_meta_workers);
btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
btrfs_destroy_workqueue(fs_info->endio_repair_workers);
btrfs_destroy_workqueue(fs_info->rmw_workers);
- btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
btrfs_destroy_workqueue(fs_info->endio_write_workers);
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
btrfs_destroy_workqueue(fs_info->submit_workers);
@@ -2221,6 +2217,13 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->flush_workers);
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
btrfs_destroy_workqueue(fs_info->extent_workers);
+ /*
+ * Now that all other work queues are destroyed, we can safely destroy
+ * the queues used for metadata I/O, since tasks from those other work
+ * queues can do metadata I/O operations.
+ */
+ btrfs_destroy_workqueue(fs_info->endio_meta_workers);
+ btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
}
static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2802,7 +2805,7 @@ int open_ctree(struct super_block *sb,
memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
- ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+ ret = btrfs_check_super_valid(fs_info);
if (ret) {
btrfs_err(fs_info, "superblock contains fatal errors");
err = -EINVAL;
@@ -3263,7 +3266,6 @@ fail_fsdev_sysfs:
fail_block_groups:
btrfs_put_block_group_cache(fs_info);
- btrfs_free_block_groups(fs_info);
fail_tree_roots:
free_root_pointers(fs_info, 1);
@@ -3271,6 +3273,7 @@ fail_tree_roots:
fail_sb_buffer:
btrfs_stop_all_workers(fs_info);
+ btrfs_free_block_groups(fs_info);
fail_alloc:
fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3411,7 +3414,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
*/
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb,
- int do_barriers, int wait, int max_mirrors)
+ int wait, int max_mirrors)
{
struct buffer_head *bh;
int i;
@@ -3696,7 +3699,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
return num_tolerated_disk_barrier_failures;
}
-static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
+int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
{
struct list_head *head;
struct btrfs_device *dev;
@@ -3753,7 +3756,7 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
flags = btrfs_super_flags(sb);
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
- ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
+ ret = write_dev_supers(dev, sb, 0, max_mirrors);
if (ret)
total_errors++;
}
@@ -3776,7 +3779,7 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
if (!dev->in_fs_metadata || !dev->writeable)
continue;
- ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
+ ret = write_dev_supers(dev, sb, 1, max_mirrors);
if (ret)
total_errors++;
}
@@ -3790,12 +3793,6 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
return 0;
}
-int write_ctree_super(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, int max_mirrors)
-{
- return write_all_supers(fs_info, max_mirrors);
-}
-
/* Drop a fs root from the radix tree and free it. */
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root)
@@ -3985,8 +3982,6 @@ void close_ctree(struct btrfs_fs_info *fs_info)
btrfs_put_block_group_cache(fs_info);
- btrfs_free_block_groups(fs_info);
-
/*
* we must make sure there is not any read request to
* submit after we stopping all workers.
@@ -3994,6 +3989,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);
+ btrfs_free_block_groups(fs_info);
+
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
free_root_pointers(fs_info, 1);
@@ -4122,8 +4119,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
return btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
}
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
- int read_only)
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
{
struct btrfs_super_block *sb = fs_info->super_copy;
u64 nodesize = btrfs_super_nodesize(sb);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 44dcd9af6b7c..0be2d4fe705b 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,14 +52,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
struct extent_buffer *btrfs_find_create_tree_block(
struct btrfs_fs_info *fs_info,
u64 bytenr);
-void clean_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
+void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
void close_ctree(struct btrfs_fs_info *fs_info);
-int write_ctree_super(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, int max_mirrors);
+int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
struct buffer_head **bh_ret);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 340d90751263..87144c9f9593 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -30,7 +30,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
- fid->objectid = btrfs_ino(inode);
+ fid->objectid = btrfs_ino(BTRFS_I(inode));
fid->root_objectid = BTRFS_I(inode)->root->objectid;
fid->gen = inode->i_generation;
@@ -166,13 +166,13 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
if (!path)
return ERR_PTR(-ENOMEM);
- if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) {
+ if (btrfs_ino(BTRFS_I(dir)) == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = root->root_key.objectid;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
root = fs_info->tree_root;
} else {
- key.objectid = btrfs_ino(dir);
+ key.objectid = btrfs_ino(BTRFS_I(dir));
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
}
@@ -235,13 +235,10 @@ static int btrfs_get_name(struct dentry *parent, char *name,
int ret;
u64 ino;
- if (!dir || !inode)
- return -EINVAL;
-
if (!S_ISDIR(dir->i_mode))
return -EINVAL;
- ino = btrfs_ino(inode);
+ ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
@@ -255,7 +252,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
root = fs_info->tree_root;
} else {
key.objectid = ino;
- key.offset = btrfs_ino(dir);
+ key.offset = btrfs_ino(BTRFS_I(dir));
key.type = BTRFS_INODE_REF_KEY;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e97302f437a1..438c7312de33 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -888,7 +888,7 @@ search_again:
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
@@ -1035,10 +1035,11 @@ out_free:
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 owner, u32 extra_size)
{
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_extent_item *item;
struct btrfs_extent_item_v0 *ei0;
struct btrfs_extent_ref_v0 *ref0;
@@ -1092,7 +1093,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
return ret;
BUG_ON(ret); /* Corruption */
- btrfs_extend_item(root->fs_info, path, new_size);
+ btrfs_extend_item(fs_info, path, new_size);
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1151,12 +1152,13 @@ static int match_extent_data_ref(struct extent_buffer *leaf,
}
static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid,
u64 owner, u64 offset)
{
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
struct btrfs_extent_data_ref *ref;
struct extent_buffer *leaf;
@@ -1238,12 +1240,13 @@ fail:
}
static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid, u64 owner,
u64 offset, int refs_to_add)
{
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
struct extent_buffer *leaf;
u32 size;
@@ -1317,7 +1320,7 @@ fail:
}
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
int refs_to_drop, int *last_ref)
{
@@ -1354,7 +1357,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
num_refs -= refs_to_drop;
if (num_refs == 0) {
- ret = btrfs_del_item(trans, root, path);
+ ret = btrfs_del_item(trans, fs_info->extent_root, path);
*last_ref = 1;
} else {
if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
@@ -1416,11 +1419,12 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
}
static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid)
{
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
int ret;
@@ -1449,7 +1453,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
}
static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid)
@@ -1466,7 +1470,8 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
key.offset = root_objectid;
}
- ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ ret = btrfs_insert_empty_item(trans, fs_info->extent_root,
+ path, &key, 0);
btrfs_release_path(path);
return ret;
}
@@ -1524,14 +1529,14 @@ static int find_next_key(struct btrfs_path *path, int level,
*/
static noinline_for_stack
int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref **ref_ret,
u64 bytenr, u64 num_bytes,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int insert)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
@@ -1614,7 +1619,7 @@ again:
err = -ENOENT;
goto out;
}
- ret = convert_extent_item_v0(trans, root, path, owner,
+ ret = convert_extent_item_v0(trans, fs_info, path, owner,
extra_size);
if (ret < 0) {
err = ret;
@@ -1716,7 +1721,7 @@ out:
* helper to add new inline back ref
*/
static noinline_for_stack
-void setup_inline_extent_backref(struct btrfs_root *root,
+void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
u64 parent, u64 root_objectid,
@@ -1739,7 +1744,7 @@ void setup_inline_extent_backref(struct btrfs_root *root,
type = extent_ref_type(parent, owner);
size = btrfs_extent_inline_ref_size(type);
- btrfs_extend_item(root->fs_info, path, size);
+ btrfs_extend_item(fs_info, path, size);
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
@@ -1777,7 +1782,7 @@ void setup_inline_extent_backref(struct btrfs_root *root,
}
static int lookup_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref **ref_ret,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -1785,7 +1790,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
{
int ret;
- ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
+ ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret,
bytenr, num_bytes, parent,
root_objectid, owner, offset, 0);
if (ret != -ENOENT)
@@ -1795,11 +1800,12 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
*ref_ret = NULL;
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
- root_objectid);
+ ret = lookup_tree_block_ref(trans, fs_info, path, bytenr,
+ parent, root_objectid);
} else {
- ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
- root_objectid, owner, offset);
+ ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
+ parent, root_objectid, owner,
+ offset);
}
return ret;
}
@@ -1808,7 +1814,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
* helper to update/remove inline back ref
*/
static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_root *root,
+void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
@@ -1866,14 +1872,14 @@ void update_inline_extent_backref(struct btrfs_root *root,
memmove_extent_buffer(leaf, ptr, ptr + size,
end - ptr - size);
item_size -= size;
- btrfs_truncate_item(root->fs_info, path, item_size, 1);
+ btrfs_truncate_item(fs_info, path, item_size, 1);
}
btrfs_mark_buffer_dirty(leaf);
}
static noinline_for_stack
int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner,
@@ -1883,15 +1889,15 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_extent_inline_ref *iref;
int ret;
- ret = lookup_inline_extent_backref(trans, root, path, &iref,
+ ret = lookup_inline_extent_backref(trans, fs_info, path, &iref,
bytenr, num_bytes, parent,
root_objectid, owner, offset, 1);
if (ret == 0) {
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
- update_inline_extent_backref(root, path, iref,
+ update_inline_extent_backref(fs_info, path, iref,
refs_to_add, extent_op, NULL);
} else if (ret == -ENOENT) {
- setup_inline_extent_backref(root, path, iref, parent,
+ setup_inline_extent_backref(fs_info, path, iref, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
ret = 0;
@@ -1900,7 +1906,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
}
static int insert_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add)
@@ -1908,10 +1914,10 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
int ret;
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
BUG_ON(refs_to_add != 1);
- ret = insert_tree_block_ref(trans, root, path, bytenr,
+ ret = insert_tree_block_ref(trans, fs_info, path, bytenr,
parent, root_objectid);
} else {
- ret = insert_extent_data_ref(trans, root, path, bytenr,
+ ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
parent, root_objectid,
owner, offset, refs_to_add);
}
@@ -1919,7 +1925,7 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
}
static int remove_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_drop, int is_data, int *last_ref)
@@ -1928,14 +1934,14 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
BUG_ON(!is_data && refs_to_drop != 1);
if (iref) {
- update_inline_extent_backref(root, path, iref,
+ update_inline_extent_backref(fs_info, path, iref,
-refs_to_drop, NULL, last_ref);
} else if (is_data) {
- ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
+ ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop,
last_ref);
} else {
*last_ref = 1;
- ret = btrfs_del_item(trans, root, path);
+ ret = btrfs_del_item(trans, fs_info->extent_root, path);
}
return ret;
}
@@ -2089,7 +2095,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes, parent, root_objectid,
owner, offset, 0,
- BTRFS_ADD_DELAYED_REF, NULL);
+ BTRFS_ADD_DELAYED_REF);
}
return ret;
}
@@ -2117,9 +2123,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* this will setup the path even if it fails to insert the back ref */
- ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
- bytenr, num_bytes, parent,
- root_objectid, owner, offset,
+ ret = insert_inline_extent_backref(trans, fs_info, path, bytenr,
+ num_bytes, parent, root_objectid,
+ owner, offset,
refs_to_add, extent_op);
if ((ret < 0 && ret != -EAGAIN) || !ret)
goto out;
@@ -2143,9 +2149,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* now insert the actual backref */
- ret = insert_extent_backref(trans, fs_info->extent_root,
- path, bytenr, parent, root_objectid,
- owner, offset, refs_to_add);
+ ret = insert_extent_backref(trans, fs_info, path, bytenr, parent,
+ root_objectid, owner, offset, refs_to_add);
if (ret)
btrfs_abort_transaction(trans, ret);
out:
@@ -2290,8 +2295,7 @@ again:
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (item_size < sizeof(*ei)) {
- ret = convert_extent_item_v0(trans, fs_info->extent_root,
- path, (u64)-1, 0);
+ ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0);
if (ret < 0) {
err = ret;
goto out;
@@ -2522,11 +2526,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (ref && ref->seq &&
btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
spin_unlock(&locked_ref->lock);
- btrfs_delayed_ref_unlock(locked_ref);
spin_lock(&delayed_refs->lock);
locked_ref->processing = 0;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
+ btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
cond_resched();
count++;
@@ -2572,7 +2576,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
*/
if (must_insert_reserved)
locked_ref->must_insert_reserved = 1;
+ spin_lock(&delayed_refs->lock);
locked_ref->processing = 0;
+ delayed_refs->num_heads_ready++;
+ spin_unlock(&delayed_refs->lock);
btrfs_debug(fs_info,
"run_delayed_extent_op returned %d",
ret);
@@ -3025,8 +3032,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
return ret;
}
-static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static noinline int check_delayed_ref(struct btrfs_root *root,
struct btrfs_path *path,
u64 objectid, u64 offset, u64 bytenr)
{
@@ -3034,11 +3040,16 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_data_ref *data_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_transaction *cur_trans;
int ret = 0;
- delayed_refs = &trans->transaction->delayed_refs;
+ cur_trans = root->fs_info->running_transaction;
+ if (!cur_trans)
+ return 0;
+
+ delayed_refs = &cur_trans->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (!head) {
spin_unlock(&delayed_refs->lock);
return 0;
@@ -3087,8 +3098,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
return ret;
}
-static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static noinline int check_committed_ref(struct btrfs_root *root,
struct btrfs_path *path,
u64 objectid, u64 offset, u64 bytenr)
{
@@ -3159,9 +3169,8 @@ out:
return ret;
}
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 offset, u64 bytenr)
+int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
+ u64 bytenr)
{
struct btrfs_path *path;
int ret;
@@ -3172,12 +3181,12 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
return -ENOENT;
do {
- ret = check_committed_ref(trans, root, path, objectid,
+ ret = check_committed_ref(root, path, objectid,
offset, bytenr);
if (ret && ret != -ENOENT)
goto out;
- ret2 = check_delayed_ref(trans, root, path, objectid,
+ ret2 = check_delayed_ref(root, path, objectid,
offset, bytenr);
} while (ret2 == -EAGAIN);
@@ -3365,7 +3374,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
if (trans->aborted)
return 0;
again:
- inode = lookup_free_space_inode(root, block_group, path);
+ inode = lookup_free_space_inode(fs_info, block_group, path);
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
ret = PTR_ERR(inode);
btrfs_release_path(path);
@@ -3379,7 +3388,8 @@ again:
if (block_group->ro)
goto out_free;
- ret = create_free_space_inode(root, trans, block_group, path);
+ ret = create_free_space_inode(fs_info, trans, block_group,
+ path);
if (ret)
goto out_free;
goto again;
@@ -3421,7 +3431,7 @@ again:
if (ret)
goto out_put;
- ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
+ ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
if (ret)
goto out_put;
}
@@ -4116,6 +4126,15 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
return ret;
}
+static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
+ bool may_use_included)
+{
+ ASSERT(s_info);
+ return s_info->bytes_used + s_info->bytes_reserved +
+ s_info->bytes_pinned + s_info->bytes_readonly +
+ (may_use_included ? s_info->bytes_may_use : 0);
+}
+
int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
@@ -4141,9 +4160,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
- used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
- data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
- data_sinfo->bytes_may_use;
+ used = btrfs_space_info_used(data_sinfo, true);
if (used + bytes > data_sinfo->total_bytes) {
struct btrfs_trans_handle *trans;
@@ -4418,9 +4435,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
- left = info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved - info->bytes_readonly -
- info->bytes_may_use;
+ left = info->total_bytes - btrfs_space_info_used(info, true);
spin_unlock(&info->lock);
num_devs = get_profile_num_devs(fs_info, type);
@@ -4603,8 +4618,7 @@ static int can_overcommit(struct btrfs_root *root,
return 0;
profile = btrfs_get_alloc_profile(root, 0);
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly;
+ used = btrfs_space_info_used(space_info, false);
/*
* We only want to allow over committing if we have lots of actual space
@@ -4784,11 +4798,10 @@ skip_async:
* get us somewhere and then commit the transaction if it does. Otherwise it
* will return -ENOSPC.
*/
-static int may_commit_transaction(struct btrfs_root *root,
+static int may_commit_transaction(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 bytes, int force)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
struct btrfs_trans_handle *trans;
@@ -4820,7 +4833,7 @@ static int may_commit_transaction(struct btrfs_root *root,
spin_unlock(&delayed_rsv->lock);
commit:
- trans = btrfs_join_transaction(root);
+ trans = btrfs_join_transaction(fs_info->fs_root);
if (IS_ERR(trans))
return -ENOSPC;
@@ -4834,11 +4847,11 @@ struct reserve_ticket {
wait_queue_head_t wait;
};
-static int flush_space(struct btrfs_root *root,
+static int flush_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 num_bytes,
u64 orig_bytes, int state)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *root = fs_info->fs_root;
struct btrfs_trans_handle *trans;
int nr;
int ret = 0;
@@ -4878,7 +4891,8 @@ static int flush_space(struct btrfs_root *root,
ret = 0;
break;
case COMMIT_TRANS:
- ret = may_commit_transaction(root, space_info, orig_bytes, 0);
+ ret = may_commit_transaction(fs_info, space_info,
+ orig_bytes, 0);
break;
default:
ret = -ENOSPC;
@@ -4990,8 +5004,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
struct reserve_ticket *ticket;
int ret;
- ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
- to_reclaim, flush_state);
+ ret = flush_space(fs_info, space_info, to_reclaim, to_reclaim,
+ flush_state);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
space_info->flush = 0;
@@ -5046,8 +5060,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
spin_unlock(&space_info->lock);
do {
- flush_space(fs_info->fs_root, space_info, to_reclaim,
- to_reclaim, flush_state);
+ flush_space(fs_info, space_info, to_reclaim, to_reclaim,
+ flush_state);
flush_state++;
spin_lock(&space_info->lock);
if (ticket->bytes == 0) {
@@ -5132,9 +5146,7 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
spin_lock(&space_info->lock);
ret = -ENOSPC;
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
+ used = btrfs_space_info_used(space_info, true);
/*
* If we have enough space then hooray, make our reservation and carry
@@ -5627,9 +5639,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->size = min_t(u64, num_bytes, SZ_512M);
if (block_rsv->reserved < block_rsv->size) {
- num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
- sinfo->bytes_reserved + sinfo->bytes_readonly +
- sinfo->bytes_may_use;
+ num_bytes = btrfs_space_info_used(sinfo, true);
if (sinfo->total_bytes > num_bytes) {
num_bytes = sinfo->total_bytes - num_bytes;
num_bytes = min(num_bytes,
@@ -5753,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
trace_btrfs_space_reservation(fs_info, "orphan",
- btrfs_ino(inode), num_bytes, 1);
+ btrfs_ino(BTRFS_I(inode)), num_bytes, 1);
return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
}
@@ -5764,7 +5774,7 @@ void btrfs_orphan_release_metadata(struct inode *inode)
u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
trace_btrfs_space_reservation(fs_info, "orphan",
- btrfs_ino(inode), num_bytes, 0);
+ btrfs_ino(BTRFS_I(inode)), num_bytes, 0);
btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
}
@@ -5796,7 +5806,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
/* One for parent inode, two for dir entries */
num_bytes = 3 * fs_info->nodesize;
- ret = btrfs_qgroup_reserve_meta(root, num_bytes);
+ ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
if (ret)
return ret;
} else {
@@ -5821,8 +5831,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
}
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv,
- u64 qgroup_reserved)
+ struct btrfs_block_rsv *rsv)
{
btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
}
@@ -5841,11 +5850,9 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
{
unsigned drop_inode_space = 0;
unsigned dropped_extents = 0;
- unsigned num_extents = 0;
+ unsigned num_extents;
- num_extents = (unsigned)div64_u64(num_bytes +
- BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ num_extents = count_max_extents(num_bytes);
ASSERT(num_extents);
ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
BTRFS_I(inode)->outstanding_extents -= num_extents;
@@ -5924,7 +5931,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
u64 to_reserve = 0;
u64 csum_bytes;
- unsigned nr_extents = 0;
+ unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
bool delalloc_lock = true;
@@ -5957,9 +5964,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
spin_lock(&BTRFS_I(inode)->lock);
- nr_extents = (unsigned)div64_u64(num_bytes +
- BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ nr_extents = count_max_extents(num_bytes);
BTRFS_I(inode)->outstanding_extents += nr_extents;
nr_extents = 0;
@@ -5976,7 +5981,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
ret = btrfs_qgroup_reserve_meta(root,
- nr_extents * fs_info->nodesize);
+ nr_extents * fs_info->nodesize, true);
if (ret)
goto out_fail;
}
@@ -6002,7 +6007,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (to_reserve)
trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(inode), to_reserve, 1);
+ btrfs_ino(BTRFS_I(inode)), to_reserve, 1);
if (release_extra)
btrfs_block_rsv_release(fs_info, block_rsv,
btrfs_calc_trans_metadata_size(fs_info, 1));
@@ -6065,7 +6070,7 @@ out_fail:
if (to_free) {
btrfs_block_rsv_release(fs_info, block_rsv, to_free);
trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(inode), to_free, 0);
+ btrfs_ino(BTRFS_I(inode)), to_free, 0);
}
if (delalloc_lock)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
@@ -6101,7 +6106,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
return;
trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(inode), to_free, 0);
+ btrfs_ino(BTRFS_I(inode)), to_free, 0);
btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
}
@@ -6558,8 +6563,7 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
spin_unlock(&space_info->lock);
return ret;
}
-void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
{
struct btrfs_caching_control *next;
struct btrfs_caching_control *caching_ctl;
@@ -6842,7 +6846,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (is_data)
skinny_metadata = 0;
- ret = lookup_extent_backref(trans, extent_root, path, &iref,
+ ret = lookup_extent_backref(trans, info, path, &iref,
bytenr, num_bytes, parent,
root_objectid, owner_objectid,
owner_offset);
@@ -6874,8 +6878,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
#endif
if (!found_extent) {
BUG_ON(iref);
- ret = remove_extent_backref(trans, extent_root, path,
- NULL, refs_to_drop,
+ ret = remove_extent_backref(trans, info, path, NULL,
+ refs_to_drop,
is_data, &last_ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -6950,8 +6954,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (item_size < sizeof(*ei)) {
BUG_ON(found_extent || extent_slot != path->slots[0]);
- ret = convert_extent_item_v0(trans, extent_root, path,
- owner_objectid, 0);
+ ret = convert_extent_item_v0(trans, info, path, owner_objectid,
+ 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -7018,7 +7022,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
}
if (found_extent) {
- ret = remove_extent_backref(trans, extent_root, path,
+ ret = remove_extent_backref(trans, info, path,
iref, refs_to_drop,
is_data, &last_ref);
if (ret) {
@@ -7092,7 +7096,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (!head)
goto out_delayed_unlock;
@@ -7241,7 +7245,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
num_bytes,
parent, root_objectid, owner,
offset, 0,
- BTRFS_DROP_DELAYED_REF, NULL);
+ BTRFS_DROP_DELAYED_REF);
}
return ret;
}
@@ -7384,7 +7388,8 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
spin_unlock(&cluster->refill_lock);
- down_read(&used_bg->data_rwsem);
+ /* We should only have one-level nested. */
+ down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
spin_lock(&cluster->refill_lock);
if (used_bg == cluster->block_group)
@@ -7415,12 +7420,11 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
* If there is no suitable free space, we will record the max size of
* the free space extent currently.
*/
-static noinline int find_free_extent(struct btrfs_root *orig_root,
+static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
u64 ram_bytes, u64 num_bytes, u64 empty_size,
u64 hint_byte, struct btrfs_key *ins,
u64 flags, int delalloc)
{
- struct btrfs_fs_info *fs_info = orig_root->fs_info;
int ret = 0;
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_free_cluster *last_ptr = NULL;
@@ -7712,18 +7716,20 @@ unclustered_alloc:
last_ptr->fragmented = 1;
spin_unlock(&last_ptr->lock);
}
- spin_lock(&block_group->free_space_ctl->tree_lock);
- if (cached &&
- block_group->free_space_ctl->free_space <
- num_bytes + empty_cluster + empty_size) {
- if (block_group->free_space_ctl->free_space >
- max_extent_size)
- max_extent_size =
- block_group->free_space_ctl->free_space;
- spin_unlock(&block_group->free_space_ctl->tree_lock);
- goto loop;
+ if (cached) {
+ struct btrfs_free_space_ctl *ctl =
+ block_group->free_space_ctl;
+
+ spin_lock(&ctl->tree_lock);
+ if (ctl->free_space <
+ num_bytes + empty_cluster + empty_size) {
+ if (ctl->free_space > max_extent_size)
+ max_extent_size = ctl->free_space;
+ spin_unlock(&ctl->tree_lock);
+ goto loop;
+ }
+ spin_unlock(&ctl->tree_lock);
}
- spin_unlock(&block_group->free_space_ctl->tree_lock);
offset = btrfs_find_space_for_alloc(block_group, search_start,
num_bytes, empty_size,
@@ -7904,9 +7910,8 @@ static void dump_space_info(struct btrfs_fs_info *fs_info,
spin_lock(&info->lock);
btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
info->flags,
- info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved - info->bytes_readonly -
- info->bytes_may_use, (info->full) ? "" : "not ");
+ info->total_bytes - btrfs_space_info_used(info, true),
+ info->full ? "" : "not ");
btrfs_info(fs_info,
"space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
info->total_bytes, info->bytes_used, info->bytes_pinned,
@@ -7947,7 +7952,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
flags = btrfs_get_alloc_profile(root, is_data);
again:
WARN_ON(num_bytes < fs_info->sectorsize);
- ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+ ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
hint_byte, ins, flags, delalloc);
if (!ret && !is_data) {
btrfs_dec_block_group_reservations(fs_info, ins->objectid);
@@ -8190,8 +8195,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
ins->offset, 0,
root_objectid, owner, offset,
- ram_bytes, BTRFS_ADD_DELAYED_EXTENT,
- NULL);
+ ram_bytes, BTRFS_ADD_DELAYED_EXTENT);
return ret;
}
@@ -8252,7 +8256,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
- clean_tree_block(trans, fs_info, buf);
+ clean_tree_block(fs_info, buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
btrfs_set_lock_blocking(buf);
@@ -8347,10 +8351,11 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
* returns the tree buffer or an ERR_PTR on error.
*/
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 parent, u64 root_objectid,
- struct btrfs_disk_key *key, int level,
- u64 hint, u64 empty_size)
+ struct btrfs_root *root,
+ u64 parent, u64 root_objectid,
+ const struct btrfs_disk_key *key,
+ int level, u64 hint,
+ u64 empty_size)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key ins;
@@ -8872,7 +8877,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_set_lock_blocking(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
- clean_tree_block(trans, fs_info, eb);
+ clean_tree_block(fs_info, eb);
}
if (eb == root->node) {
@@ -9342,8 +9347,7 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
- if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
- sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
+ if (btrfs_space_info_used(sinfo, true) + num_bytes +
min_allocable_bytes <= sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
cache->ro++;
@@ -9356,17 +9360,16 @@ out:
return ret;
}
-int btrfs_inc_block_group_ro(struct btrfs_root *root,
+int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
u64 alloc_flags;
int ret;
again:
- trans = btrfs_join_transaction(root);
+ trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -9553,9 +9556,8 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
* all of the extents from this block group. If we can, we're good
*/
if ((space_info->total_bytes != block_group->key.offset) &&
- (space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- min_free < space_info->total_bytes)) {
+ (btrfs_space_info_used(space_info, false) + min_free <
+ space_info->total_bytes)) {
spin_unlock(&space_info->lock);
goto out;
}
@@ -9738,6 +9740,11 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
}
}
+/*
+ * Must be called only after stopping all workers, since we could have block
+ * group caching kthreads running, and therefore they could race with us if we
+ * freed the block groups before stopping them.
+ */
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
@@ -9777,9 +9784,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
list_del(&block_group->list);
up_write(&block_group->space_info->groups_sem);
- if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_block_group_cache_done(block_group);
-
/*
* We haven't cached this block group, which means we could
* possibly have excluded extents on this block group.
@@ -9789,6 +9793,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
free_excluded_extents(info, block_group);
btrfs_remove_free_space_cache(block_group);
+ ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
ASSERT(list_empty(&block_group->dirty_list));
ASSERT(list_empty(&block_group->io_list));
ASSERT(list_empty(&block_group->bg_list));
@@ -10313,7 +10318,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* get the inode first so any iput calls done for the io_list
* aren't the final iput (no unlinks allowed now)
*/
- inode = lookup_free_space_inode(tree_root, block_group, path);
+ inode = lookup_free_space_inode(fs_info, block_group, path);
mutex_lock(&trans->transaction->cache_write_mutex);
/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4ac383a3a649..d15b5ddb6732 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -98,7 +98,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
"%s: ino %llu isize %llu odd range [%llu,%llu]",
- caller, btrfs_ino(inode), isize, start, end);
+ caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
}
}
#else
@@ -144,7 +144,7 @@ static void add_extent_changeset(struct extent_state *state, unsigned bits,
if (!set && (state->state & bits) == 0)
return;
changeset->bytes_changed += state->end - state->start + 1;
- ret = ulist_add(changeset->range_changed, state->start, state->end,
+ ret = ulist_add(&changeset->range_changed, state->start, state->end,
GFP_ATOMIC);
/* ENOMEM */
BUG_ON(ret < 0);
@@ -226,6 +226,11 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
{
struct extent_state *state;
+ /*
+ * The given mask might be not appropriate for the slab allocator,
+ * drop the unsupported bits
+ */
+ mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
state = kmem_cache_alloc(extent_state_cache, mask);
if (!state)
return state;
@@ -1549,33 +1554,24 @@ out:
return found;
}
+static int __process_pages_contig(struct address_space *mapping,
+ struct page *locked_page,
+ pgoff_t start_index, pgoff_t end_index,
+ unsigned long page_ops, pgoff_t *index_ret);
+
static noinline void __unlock_for_delalloc(struct inode *inode,
struct page *locked_page,
u64 start, u64 end)
{
- int ret;
- struct page *pages[16];
unsigned long index = start >> PAGE_SHIFT;
unsigned long end_index = end >> PAGE_SHIFT;
- unsigned long nr_pages = end_index - index + 1;
- int i;
+ ASSERT(locked_page);
if (index == locked_page->index && end_index == index)
return;
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long, nr_pages,
- ARRAY_SIZE(pages)), pages);
- for (i = 0; i < ret; i++) {
- if (pages[i] != locked_page)
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
- nr_pages -= ret;
- index += ret;
- cond_resched();
- }
+ __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
+ PAGE_UNLOCK, NULL);
}
static noinline int lock_delalloc_pages(struct inode *inode,
@@ -1584,59 +1580,19 @@ static noinline int lock_delalloc_pages(struct inode *inode,
u64 delalloc_end)
{
unsigned long index = delalloc_start >> PAGE_SHIFT;
- unsigned long start_index = index;
+ unsigned long index_ret = index;
unsigned long end_index = delalloc_end >> PAGE_SHIFT;
- unsigned long pages_locked = 0;
- struct page *pages[16];
- unsigned long nrpages;
int ret;
- int i;
- /* the caller is responsible for locking the start index */
+ ASSERT(locked_page);
if (index == locked_page->index && index == end_index)
return 0;
- /* skip the page at the start index */
- nrpages = end_index - index + 1;
- while (nrpages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long,
- nrpages, ARRAY_SIZE(pages)), pages);
- if (ret == 0) {
- ret = -EAGAIN;
- goto done;
- }
- /* now we have an array of pages, lock them all */
- for (i = 0; i < ret; i++) {
- /*
- * the caller is taking responsibility for
- * locked_page
- */
- if (pages[i] != locked_page) {
- lock_page(pages[i]);
- if (!PageDirty(pages[i]) ||
- pages[i]->mapping != inode->i_mapping) {
- ret = -EAGAIN;
- unlock_page(pages[i]);
- put_page(pages[i]);
- goto done;
- }
- }
- put_page(pages[i]);
- pages_locked++;
- }
- nrpages -= ret;
- index += ret;
- cond_resched();
- }
- ret = 0;
-done:
- if (ret && pages_locked) {
- __unlock_for_delalloc(inode, locked_page,
- delalloc_start,
- ((u64)(start_index + pages_locked - 1)) <<
- PAGE_SHIFT);
- }
+ ret = __process_pages_contig(inode->i_mapping, locked_page, index,
+ end_index, PAGE_LOCK, &index_ret);
+ if (ret == -EAGAIN)
+ __unlock_for_delalloc(inode, locked_page, delalloc_start,
+ (u64)index_ret << PAGE_SHIFT);
return ret;
}
@@ -1726,37 +1682,47 @@ out_failed:
return found;
}
-void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
- u64 delalloc_end, struct page *locked_page,
- unsigned clear_bits,
- unsigned long page_ops)
+static int __process_pages_contig(struct address_space *mapping,
+ struct page *locked_page,
+ pgoff_t start_index, pgoff_t end_index,
+ unsigned long page_ops, pgoff_t *index_ret)
{
- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
- int ret;
+ unsigned long nr_pages = end_index - start_index + 1;
+ unsigned long pages_locked = 0;
+ pgoff_t index = start_index;
struct page *pages[16];
- unsigned long index = start >> PAGE_SHIFT;
- unsigned long end_index = end >> PAGE_SHIFT;
- unsigned long nr_pages = end_index - index + 1;
+ unsigned ret;
+ int err = 0;
int i;
- clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
- if (page_ops == 0)
- return;
+ if (page_ops & PAGE_LOCK) {
+ ASSERT(page_ops == PAGE_LOCK);
+ ASSERT(index_ret && *index_ret == start_index);
+ }
if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
- mapping_set_error(inode->i_mapping, -EIO);
+ mapping_set_error(mapping, -EIO);
while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
+ ret = find_get_pages_contig(mapping, index,
min_t(unsigned long,
nr_pages, ARRAY_SIZE(pages)), pages);
- for (i = 0; i < ret; i++) {
+ if (ret == 0) {
+ /*
+ * Only if we're going to lock these pages,
+ * can we find nothing at @index.
+ */
+ ASSERT(page_ops & PAGE_LOCK);
+ return ret;
+ }
+ for (i = 0; i < ret; i++) {
if (page_ops & PAGE_SET_PRIVATE2)
SetPagePrivate2(pages[i]);
if (pages[i] == locked_page) {
put_page(pages[i]);
+ pages_locked++;
continue;
}
if (page_ops & PAGE_CLEAR_DIRTY)
@@ -1769,12 +1735,40 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
end_page_writeback(pages[i]);
if (page_ops & PAGE_UNLOCK)
unlock_page(pages[i]);
+ if (page_ops & PAGE_LOCK) {
+ lock_page(pages[i]);
+ if (!PageDirty(pages[i]) ||
+ pages[i]->mapping != mapping) {
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ err = -EAGAIN;
+ goto out;
+ }
+ }
put_page(pages[i]);
+ pages_locked++;
}
nr_pages -= ret;
index += ret;
cond_resched();
}
+out:
+ if (err && index_ret)
+ *index_ret = start_index + pages_locked - 1;
+ return err;
+}
+
+void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+ u64 delalloc_end, struct page *locked_page,
+ unsigned clear_bits,
+ unsigned long page_ops)
+{
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
+ NULL, GFP_NOFS);
+
+ __process_pages_contig(inode->i_mapping, locked_page,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ page_ops, NULL);
}
/*
@@ -2060,7 +2054,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
btrfs_info_rl_in_rcu(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
- btrfs_ino(inode), start,
+ btrfs_ino(BTRFS_I(inode)), start,
rcu_str_deref(dev->name), sector);
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
@@ -2765,7 +2759,6 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
size_t size, unsigned long offset,
struct block_device *bdev,
struct bio **bio_ret,
- unsigned long max_pages,
bio_end_io_t end_io_func,
int mirror_num,
unsigned long prev_bio_flags,
@@ -2931,7 +2924,6 @@ static int __do_readpage(struct extent_io_tree *tree,
}
}
while (cur <= end) {
- unsigned long pnr = (last_byte >> PAGE_SHIFT) + 1;
bool force_bio_submit = false;
if (cur >= last_byte) {
@@ -3066,10 +3058,9 @@ static int __do_readpage(struct extent_io_tree *tree,
continue;
}
- pnr -= page->index;
ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL,
page, sector, disk_io_size, pg_offset,
- bdev, bio, pnr,
+ bdev, bio,
end_bio_extent_readpage, mirror_num,
*bio_flags,
this_bio_flag,
@@ -3210,7 +3201,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
-static void update_nr_written(struct page *page, struct writeback_control *wbc,
+static void update_nr_written(struct writeback_control *wbc,
unsigned long nr_written)
{
wbc->nr_to_write -= nr_written;
@@ -3330,7 +3321,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
u64 block_start;
u64 iosize;
sector_t sector;
- struct extent_state *cached_state = NULL;
struct extent_map *em;
struct block_device *bdev;
size_t pg_offset = 0;
@@ -3349,10 +3339,9 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
else
redirty_page_for_writepage(wbc, page);
- update_nr_written(page, wbc, nr_written);
+ update_nr_written(wbc, nr_written);
unlock_page(page);
- ret = 1;
- goto done_unlocked;
+ return 1;
}
}
@@ -3360,7 +3349,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
* we don't want to touch the inode after unlocking the page,
* so we update the mapping writeback index now
*/
- update_nr_written(page, wbc, nr_written + 1);
+ update_nr_written(wbc, nr_written + 1);
end = page_end;
if (i_size <= start) {
@@ -3374,7 +3363,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
while (cur <= end) {
u64 em_end;
- unsigned long max_nr;
if (cur >= i_size) {
if (tree->ops && tree->ops->writepage_end_io_hook)
@@ -3431,8 +3419,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
continue;
}
- max_nr = (i_size >> PAGE_SHIFT) + 1;
-
set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
btrfs_err(BTRFS_I(inode)->root->fs_info,
@@ -3442,11 +3428,14 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
page, sector, iosize, pg_offset,
- bdev, &epd->bio, max_nr,
+ bdev, &epd->bio,
end_bio_extent_writepage,
0, 0, 0, false);
- if (ret)
+ if (ret) {
SetPageError(page);
+ if (PageWriteback(page))
+ end_page_writeback(page);
+ }
cur = cur + iosize;
pg_offset += iosize;
@@ -3454,11 +3443,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
}
done:
*nr_ret = nr;
-
-done_unlocked:
-
- /* drop our reference on any cached states */
- free_extent_state(cached_state);
return ret;
}
@@ -3761,20 +3745,21 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
set_page_writeback(p);
ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
p, offset >> 9, PAGE_SIZE, 0, bdev,
- &epd->bio, -1,
+ &epd->bio,
end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags, false);
epd->bio_flags = bio_flags;
if (ret) {
set_btree_ioerr(p);
- end_page_writeback(p);
+ if (PageWriteback(p))
+ end_page_writeback(p);
if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
end_extent_buffer_writeback(eb);
ret = -EIO;
break;
}
offset += PAGE_SIZE;
- update_nr_written(p, wbc, 1);
+ update_nr_written(wbc, 1);
unlock_page(p);
}
@@ -3926,8 +3911,7 @@ retry:
* WB_SYNC_ALL then we were called for data integrity and we must wait for
* existing IO to complete.
*/
-static int extent_write_cache_pages(struct extent_io_tree *tree,
- struct address_space *mapping,
+static int extent_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc,
writepage_t writepage, void *data,
void (*flush_fn)(void *))
@@ -4168,8 +4152,7 @@ int extent_writepages(struct extent_io_tree *tree,
.bio_flags = 0,
};
- ret = extent_write_cache_pages(tree, mapping, wbc,
- __extent_writepage, &epd,
+ ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
flush_write_bio);
flush_epd_write_bio(&epd);
return ret;
@@ -4264,8 +4247,6 @@ static int try_release_extent_state(struct extent_map_tree *map,
EXTENT_IOBITS, 0, NULL))
ret = 0;
else {
- if ((mask & GFP_NOFS) == GFP_NOFS)
- mask = GFP_NOFS;
/*
* at this point we can safely clear everything except the
* locked bit and the nodatasum bit
@@ -4410,8 +4391,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* lookup the last file extent. We're not using i_size here
* because there might be preallocation past i_size
*/
- ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
- 0);
+ ret = btrfs_lookup_file_extent(NULL, root, path,
+ btrfs_ino(BTRFS_I(inode)), -1, 0);
if (ret < 0) {
btrfs_free_path(path);
return ret;
@@ -4426,7 +4407,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
found_type = found_key.type;
/* No extents, but there might be delalloc bits */
- if (found_key.objectid != btrfs_ino(inode) ||
+ if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
found_type != BTRFS_EXTENT_DATA_KEY) {
/* have to trust i_size as the end */
last = (u64)-1;
@@ -4535,8 +4516,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* lookup stuff.
*/
ret = btrfs_check_shared(trans, root->fs_info,
- root->objectid,
- btrfs_ino(inode), bytenr);
+ root->objectid,
+ btrfs_ino(BTRFS_I(inode)), bytenr);
if (trans)
btrfs_end_transaction(trans);
if (ret < 0)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 17f9ce479ed7..270d03be290e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -45,13 +45,14 @@
#define EXTENT_BUFFER_IN_TREE 10
#define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */
-/* these are flags for extent_clear_unlock_delalloc */
+/* these are flags for __process_pages_contig */
#define PAGE_UNLOCK (1 << 0)
#define PAGE_CLEAR_DIRTY (1 << 1)
#define PAGE_SET_WRITEBACK (1 << 2)
#define PAGE_END_WRITEBACK (1 << 3)
#define PAGE_SET_PRIVATE2 (1 << 4)
#define PAGE_SET_ERROR (1 << 5)
+#define PAGE_LOCK (1 << 6)
/*
* page->private values. Every page that is controlled by the extent
@@ -192,7 +193,7 @@ struct extent_changeset {
u64 bytes_changed;
/* Changed ranges */
- struct ulist *range_changed;
+ struct ulist range_changed;
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index e97e322c28f0..e35df48b4383 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -255,7 +255,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
} else {
btrfs_info_rl(fs_info,
"no csum found for inode %llu start %llu",
- btrfs_ino(inode), offset);
+ btrfs_ino(BTRFS_I(inode)), offset);
}
item = NULL;
btrfs_release_path(path);
@@ -643,7 +643,33 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
/* delete the entire item, it is inside our range */
if (key.offset >= bytenr && csum_end <= end_byte) {
- ret = btrfs_del_item(trans, root, path);
+ int del_nr = 1;
+
+ /*
+ * Check how many csum items preceding this one in this
+ * leaf correspond to our range and then delete them all
+ * at once.
+ */
+ if (key.offset > bytenr && path->slots[0] > 0) {
+ int slot = path->slots[0] - 1;
+
+ while (slot >= 0) {
+ struct btrfs_key pk;
+
+ btrfs_item_key_to_cpu(leaf, &pk, slot);
+ if (pk.offset < bytenr ||
+ pk.type != BTRFS_EXTENT_CSUM_KEY ||
+ pk.objectid !=
+ BTRFS_EXTENT_CSUM_OBJECTID)
+ break;
+ path->slots[0] = slot;
+ del_nr++;
+ key.offset = pk.offset;
+ slot--;
+ }
+ }
+ ret = btrfs_del_items(trans, root, path,
+ path->slots[0], del_nr);
if (ret)
goto out;
if (key.offset == bytenr)
@@ -856,8 +882,8 @@ insert:
tmp = min(tmp, (next_offset - file_key.offset) >>
fs_info->sb->s_blocksize_bits);
- tmp = max((u64)1, tmp);
- tmp = min(tmp, (u64)MAX_CSUM_ITEMS(fs_info, csum_size));
+ tmp = max_t(u64, 1, tmp);
+ tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
ins_size = csum_size * tmp;
} else {
ins_size = csum_size;
@@ -977,7 +1003,7 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
} else {
btrfs_err(fs_info,
"unknown file extent item type %d, inode %llu, offset %llu, root %llu",
- type, btrfs_ino(inode), extent_start,
+ type, btrfs_ino(BTRFS_I(inode)), extent_start,
root->root_key.objectid);
}
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b5c5da215d05..18e5146df864 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -168,7 +168,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
if (!defrag)
return -ENOMEM;
- defrag->ino = btrfs_ino(inode);
+ defrag->ino = btrfs_ino(BTRFS_I(inode));
defrag->transid = transid;
defrag->root = root->root_key.objectid;
@@ -702,7 +702,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
struct btrfs_key new_key;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
u64 search_start = start;
u64 disk_bytenr = 0;
u64 num_bytes = 0;
@@ -1102,7 +1102,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
int del_slot = 0;
int recow;
int ret;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
@@ -2062,7 +2062,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* commit does not start nor waits for ordered extents to complete.
*/
smp_mb();
- if (btrfs_inode_in_log(inode, fs_info->generation) ||
+ if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
(full_sync && BTRFS_I(inode)->last_trans <=
fs_info->last_trans_committed) ||
(!btrfs_have_ordered_extents_in_range(inode, start, len) &&
@@ -2203,7 +2203,7 @@ static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
return 0;
btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != btrfs_ino(inode) ||
+ if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
key.type != BTRFS_EXTENT_DATA_KEY)
return 0;
@@ -2237,7 +2237,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
if (btrfs_fs_incompat(fs_info, NO_HOLES))
goto out;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = offset;
@@ -2285,9 +2285,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
}
btrfs_release_path(path);
- ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
- 0, 0, end - offset, 0, end - offset,
- 0, 0, 0);
+ ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+ offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
if (ret)
return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 7015892c9ee8..1a131f7d6c1b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -94,12 +94,11 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
return inode;
}
-struct inode *lookup_free_space_inode(struct btrfs_root *root,
+struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path)
{
struct inode *inode = NULL;
- struct btrfs_fs_info *fs_info = root->fs_info;
u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
spin_lock(&block_group->lock);
@@ -109,7 +108,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
if (inode)
return inode;
- inode = __lookup_free_space_inode(root, path,
+ inode = __lookup_free_space_inode(fs_info->tree_root, path,
block_group->key.objectid);
if (IS_ERR(inode))
return inode;
@@ -192,7 +191,7 @@ static int __create_free_space_inode(struct btrfs_root *root,
return 0;
}
-int create_free_space_inode(struct btrfs_root *root,
+int create_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
@@ -200,11 +199,11 @@ int create_free_space_inode(struct btrfs_root *root,
int ret;
u64 ino;
- ret = btrfs_find_free_objectid(root, &ino);
+ ret = btrfs_find_free_objectid(fs_info->tree_root, &ino);
if (ret < 0)
return ret;
- return __create_free_space_inode(root, trans, path, ino,
+ return __create_free_space_inode(fs_info->tree_root, trans, path, ino,
block_group->key.objectid);
}
@@ -227,21 +226,21 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
return ret;
}
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
+int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct inode *inode)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
- struct btrfs_path *path = btrfs_alloc_path();
bool locked = false;
- if (!path) {
- ret = -ENOMEM;
- goto fail;
- }
-
if (block_group) {
+ struct btrfs_path *path = btrfs_alloc_path();
+
+ if (!path) {
+ ret = -ENOMEM;
+ goto fail;
+ }
locked = true;
mutex_lock(&trans->transaction->cache_write_mutex);
if (!list_empty(&block_group->io_list)) {
@@ -258,8 +257,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_CLEAR;
spin_unlock(&block_group->lock);
+ btrfs_free_path(path);
}
- btrfs_free_path(path);
btrfs_i_size_write(inode, 0);
truncate_pagecache(inode, 0);
@@ -286,14 +285,14 @@ fail:
return ret;
}
-static int readahead_cache(struct inode *inode)
+static void readahead_cache(struct inode *inode)
{
struct file_ra_state *ra;
unsigned long last_index;
ra = kzalloc(sizeof(*ra), GFP_NOFS);
if (!ra)
- return -ENOMEM;
+ return;
file_ra_state_init(ra, inode->i_mapping);
last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
@@ -301,8 +300,6 @@ static int readahead_cache(struct inode *inode)
page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
kfree(ra);
-
- return 0;
}
static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
@@ -313,7 +310,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
- if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
check_crcs = 1;
/* Make sure we can fit our crcs into the first page */
@@ -730,9 +727,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (ret)
return ret;
- ret = readahead_cache(inode);
- if (ret)
- goto out;
+ readahead_cache(inode);
ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
if (ret)
@@ -828,7 +823,6 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_root *root = fs_info->tree_root;
struct inode *inode;
struct btrfs_path *path;
int ret = 0;
@@ -852,7 +846,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
path->search_commit_root = 1;
path->skip_locking = 1;
- inode = lookup_free_space_inode(root, block_group, path);
+ inode = lookup_free_space_inode(fs_info, block_group, path);
if (IS_ERR(inode)) {
btrfs_free_path(path);
return 0;
@@ -1128,8 +1122,7 @@ cleanup_bitmap_list(struct list_head *bitmap_list)
static void noinline_for_stack
cleanup_write_cache_enospc(struct inode *inode,
struct btrfs_io_ctl *io_ctl,
- struct extent_state **cached_state,
- struct list_head *bitmap_list)
+ struct extent_state **cached_state)
{
io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
@@ -1225,8 +1218,6 @@ int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
* @ctl - the free space cache we are going to write out
* @block_group - the block_group for this cache if it belongs to a block_group
* @trans - the trans handle
- * @path - the path to use
- * @offset - the offset for the key we'll insert
*
* This function writes out a free space cache struct to disk for quick recovery
* on mount. This will return 0 if it was successful in writing the cache out,
@@ -1236,8 +1227,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_block_group_cache *block_group,
struct btrfs_io_ctl *io_ctl,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path, u64 offset)
+ struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_state *cached_state = NULL;
@@ -1365,7 +1355,7 @@ out_nospc_locked:
mutex_unlock(&ctl->cache_writeout_mutex);
out_nospc:
- cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list);
+ cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
up_write(&block_group->data_rwsem);
@@ -1378,7 +1368,6 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
- struct btrfs_root *root = fs_info->tree_root;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct inode *inode;
int ret = 0;
@@ -1390,13 +1379,12 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
}
spin_unlock(&block_group->lock);
- inode = lookup_free_space_inode(root, block_group, path);
+ inode = lookup_free_space_inode(fs_info, block_group, path);
if (IS_ERR(inode))
return 0;
- ret = __btrfs_write_out_cache(root, inode, ctl, block_group,
- &block_group->io_ctl, trans,
- path, block_group->key.objectid);
+ ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
+ block_group, &block_group->io_ctl, trans);
if (ret) {
#ifdef DEBUG
btrfs_err(fs_info,
@@ -3543,8 +3531,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
return 0;
memset(&io_ctl, 0, sizeof(io_ctl));
- ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
- trans, path, 0);
+ ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans);
if (!ret) {
/*
* At this point writepages() didn't error out, so our metadata
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 6f3c025a2c6c..79eca4cabb1c 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -51,18 +51,17 @@ struct btrfs_free_space_op {
struct btrfs_io_ctl;
-struct inode *lookup_free_space_inode(struct btrfs_root *root,
+struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path);
-int create_free_space_inode(struct btrfs_root *root,
+int create_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
+int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct inode *inode);
int load_free_space_cache(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index ff0c55337c2e..dd7fb22a955a 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1269,7 +1269,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
list_del(&free_space_root->dirty_list);
btrfs_tree_lock(free_space_root->node);
- clean_tree_block(trans, fs_info, free_space_root->node);
+ clean_tree_block(fs_info, free_space_root->node);
btrfs_tree_unlock(free_space_root->node);
btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
0, 1);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 144b119ff43f..3bbb8f095953 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -467,7 +467,7 @@ again:
}
if (i_size_read(inode) > 0) {
- ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
+ ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
if (ret) {
if (ret != -ENOSPC)
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f2b281ad7af6..956df76dc235 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -71,6 +71,7 @@ struct btrfs_dio_data {
u64 reserve;
u64 unsubmitted_oe_range_start;
u64 unsubmitted_oe_range_end;
+ int overwrite;
};
static const struct inode_operations btrfs_dir_inode_operations;
@@ -108,11 +109,11 @@ static noinline int cow_file_range(struct inode *inode,
u64 start, u64 end, u64 delalloc_end,
int *page_started, unsigned long *nr_written,
int unlock, struct btrfs_dedupe_hash *hash);
-static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
- u64 len, u64 orig_start,
- u64 block_start, u64 block_len,
- u64 orig_block_len, u64 ram_bytes,
- int type);
+static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
+ u64 orig_start, u64 block_start,
+ u64 block_len, u64 orig_block_len,
+ u64 ram_bytes, int compress_type,
+ int type);
static int btrfs_dirty_inode(struct inode *inode);
@@ -166,7 +167,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
struct btrfs_key key;
size_t datasize;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.offset = start;
key.type = BTRFS_EXTENT_DATA_KEY;
@@ -388,6 +389,15 @@ static inline int inode_need_compress(struct inode *inode)
return 0;
}
+static inline void inode_should_defrag(struct inode *inode,
+ u64 start, u64 end, u64 num_bytes, u64 small_write)
+{
+ /* If this is a small write inside eof, kick off a defrag */
+ if (num_bytes < small_write &&
+ (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
+ btrfs_add_inode_defrag(NULL, inode);
+}
+
/*
* we create compressed extents in two phases. The first
* phase compresses a range of pages that have already been
@@ -430,10 +440,7 @@ static noinline void compress_file_range(struct inode *inode,
int compress_type = fs_info->compress_type;
int redirty = 0;
- /* if this is a small write inside eof, kick off a defrag */
- if ((end - start + 1) < SZ_16K &&
- (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
- btrfs_add_inode_defrag(NULL, inode);
+ inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
actual_end = min_t(u64, isize, end + 1);
again:
@@ -541,7 +548,7 @@ cont:
* to make an uncompressed inline extent.
*/
ret = cow_file_range_inline(root, inode, start, end,
- 0, 0, NULL);
+ 0, BTRFS_COMPRESS_NONE, NULL);
} else {
/* try making a compressed inline extent */
ret = cow_file_range_inline(root, inode, start, end,
@@ -690,7 +697,6 @@ static noinline void submit_compressed_extents(struct inode *inode,
struct btrfs_key ins;
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_io_tree *io_tree;
int ret = 0;
@@ -778,46 +784,19 @@ retry:
* here we're doing allocation and writeback of the
* compressed pages
*/
- btrfs_drop_extent_cache(inode, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1, 0);
-
- em = alloc_extent_map();
- if (!em) {
- ret = -ENOMEM;
- goto out_free_reserve;
- }
- em->start = async_extent->start;
- em->len = async_extent->ram_size;
- em->orig_start = em->start;
- em->mod_start = em->start;
- em->mod_len = em->len;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->orig_block_len = ins.offset;
- em->ram_bytes = async_extent->ram_size;
- em->bdev = fs_info->fs_devices->latest_bdev;
- em->compress_type = async_extent->compress_type;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
- set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
- em->generation = -1;
-
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em, 1);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1, 0);
- }
-
- if (ret)
+ em = create_io_em(inode, async_extent->start,
+ async_extent->ram_size, /* len */
+ async_extent->start, /* orig_start */
+ ins.objectid, /* block_start */
+ ins.offset, /* block_len */
+ ins.offset, /* orig_block_len */
+ async_extent->ram_size, /* ram_bytes */
+ async_extent->compress_type,
+ BTRFS_ORDERED_COMPRESSED);
+ if (IS_ERR(em))
+ /* ret value is not necessary due to void function */
goto out_free_reserve;
+ free_extent_map(em);
ret = btrfs_add_ordered_extent_compress(inode,
async_extent->start,
@@ -952,7 +931,6 @@ static noinline int cow_file_range(struct inode *inode,
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
if (btrfs_is_free_space_inode(inode)) {
@@ -965,15 +943,12 @@ static noinline int cow_file_range(struct inode *inode,
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
- /* if this is a small write inside eof, kick off defrag */
- if (num_bytes < SZ_64K &&
- (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
- btrfs_add_inode_defrag(NULL, inode);
+ inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
if (start == 0) {
/* lets try to make an inline extent */
- ret = cow_file_range_inline(root, inode, start, end, 0, 0,
- NULL);
+ ret = cow_file_range_inline(root, inode, start, end, 0,
+ BTRFS_COMPRESS_NONE, NULL);
if (ret == 0) {
extent_clear_unlock_delalloc(inode, start, end,
delalloc_end, NULL,
@@ -1008,39 +983,18 @@ static noinline int cow_file_range(struct inode *inode,
if (ret < 0)
goto out_unlock;
- em = alloc_extent_map();
- if (!em) {
- ret = -ENOMEM;
- goto out_reserve;
- }
- em->start = start;
- em->orig_start = em->start;
ram_size = ins.offset;
- em->len = ins.offset;
- em->mod_start = em->start;
- em->mod_len = em->len;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->orig_block_len = ins.offset;
- em->ram_bytes = ram_size;
- em->bdev = fs_info->fs_devices->latest_bdev;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
- em->generation = -1;
-
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em, 1);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, start,
- start + ram_size - 1, 0);
- }
- if (ret)
+ em = create_io_em(inode, start, ins.offset, /* len */
+ start, /* orig_start */
+ ins.objectid, /* block_start */
+ ins.offset, /* block_len */
+ ins.offset, /* orig_block_len */
+ ram_size, /* ram_bytes */
+ BTRFS_COMPRESS_NONE, /* compress_type */
+ BTRFS_ORDERED_REGULAR /* type */);
+ if (IS_ERR(em))
goto out_reserve;
+ free_extent_map(em);
cur_alloc_size = ins.offset;
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
@@ -1164,7 +1118,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
struct btrfs_root *root = BTRFS_I(inode)->root;
unsigned long nr_pages;
u64 cur_end;
- int limit = 10 * SZ_1M;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1, 0, NULL, GFP_NOFS);
@@ -1196,12 +1149,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
- if (atomic_read(&fs_info->async_delalloc_pages) > limit) {
- wait_event(fs_info->async_submit_wait,
- (atomic_read(&fs_info->async_delalloc_pages) <
- limit));
- }
-
while (atomic_read(&fs_info->async_submit_draining) &&
atomic_read(&fs_info->async_delalloc_pages)) {
wait_event(fs_info->async_submit_wait,
@@ -1250,11 +1197,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_file_extent_item *fi;
struct btrfs_key found_key;
+ struct extent_map *em;
u64 cow_start;
u64 cur_offset;
u64 extent_end;
@@ -1269,7 +1216,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
int nocow;
int check_prev = 1;
bool nolock;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path) {
@@ -1286,30 +1233,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
nolock = btrfs_is_free_space_inode(inode);
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
-
- if (IS_ERR(trans)) {
- extent_clear_unlock_delalloc(inode, start, end, end,
- locked_page,
- EXTENT_LOCKED | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, PAGE_UNLOCK |
- PAGE_CLEAR_DIRTY |
- PAGE_SET_WRITEBACK |
- PAGE_END_WRITEBACK);
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
-
- trans->block_rsv = &fs_info->delalloc_block_rsv;
-
cow_start = (u64)-1;
cur_offset = start;
while (1) {
- ret = btrfs_lookup_file_extent(trans, root, path, ino,
+ ret = btrfs_lookup_file_extent(NULL, root, path, ino,
cur_offset, 0);
if (ret < 0)
goto error;
@@ -1382,7 +1309,7 @@ next_slot:
goto out_check;
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
- if (btrfs_cross_ref_exist(trans, root, ino,
+ if (btrfs_cross_ref_exist(root, ino,
found_key.offset -
extent_offset, disk_bytenr))
goto out_check;
@@ -1404,10 +1331,16 @@ next_slot:
* either valid or do not exist.
*/
if (csum_exist_in_range(fs_info, disk_bytenr,
- num_bytes))
+ num_bytes)) {
+ if (!nolock)
+ btrfs_end_write_no_snapshoting(root);
goto out_check;
- if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
+ }
+ if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
+ if (!nolock)
+ btrfs_end_write_no_snapshoting(root);
goto out_check;
+ }
nocow = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset +
@@ -1455,35 +1388,28 @@ out_check:
}
if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
- struct extent_map *em;
- struct extent_map_tree *em_tree;
- em_tree = &BTRFS_I(inode)->extent_tree;
- em = alloc_extent_map();
- BUG_ON(!em); /* -ENOMEM */
- em->start = cur_offset;
- em->orig_start = found_key.offset - extent_offset;
- em->len = num_bytes;
- em->block_len = num_bytes;
- em->block_start = disk_bytenr;
- em->orig_block_len = disk_num_bytes;
- em->ram_bytes = ram_bytes;
- em->bdev = fs_info->fs_devices->latest_bdev;
- em->mod_start = em->start;
- em->mod_len = em->len;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
- set_bit(EXTENT_FLAG_FILLING, &em->flags);
- em->generation = -1;
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em, 1);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, em->start,
- em->start + em->len - 1, 0);
+ u64 orig_start = found_key.offset - extent_offset;
+
+ em = create_io_em(inode, cur_offset, num_bytes,
+ orig_start,
+ disk_bytenr, /* block_start */
+ num_bytes, /* block_len */
+ disk_num_bytes, /* orig_block_len */
+ ram_bytes, BTRFS_COMPRESS_NONE,
+ BTRFS_ORDERED_PREALLOC);
+ if (IS_ERR(em)) {
+ if (!nolock && nocow)
+ btrfs_end_write_no_snapshoting(root);
+ if (nocow)
+ btrfs_dec_nocow_writers(fs_info,
+ disk_bytenr);
+ ret = PTR_ERR(em);
+ goto error;
}
+ free_extent_map(em);
+ }
+
+ if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
type = BTRFS_ORDERED_PREALLOC;
} else {
type = BTRFS_ORDERED_NOCOW;
@@ -1534,10 +1460,6 @@ out_check:
}
error:
- err = btrfs_end_transaction(trans);
- if (!ret)
- ret = err;
-
if (ret && cur_offset < end)
extent_clear_unlock_delalloc(inode, cur_offset, end, end,
locked_page, EXTENT_LOCKED |
@@ -1609,7 +1531,7 @@ static void btrfs_split_extent_hook(struct inode *inode,
size = orig->end - orig->start + 1;
if (size > BTRFS_MAX_EXTENT_SIZE) {
- u64 num_extents;
+ u32 num_extents;
u64 new_size;
/*
@@ -1617,13 +1539,10 @@ static void btrfs_split_extent_hook(struct inode *inode,
* applies here, just in reverse.
*/
new_size = orig->end - split + 1;
- num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ num_extents = count_max_extents(new_size);
new_size = split - orig->start;
- num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
- if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+ num_extents += count_max_extents(new_size);
+ if (count_max_extents(size) >= num_extents)
return;
}
@@ -1643,7 +1562,7 @@ static void btrfs_merge_extent_hook(struct inode *inode,
struct extent_state *other)
{
u64 new_size, old_size;
- u64 num_extents;
+ u32 num_extents;
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
@@ -1681,14 +1600,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
* this case.
*/
old_size = other->end - other->start + 1;
- num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ num_extents = count_max_extents(old_size);
old_size = new->end - new->start + 1;
- num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
-
- if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+ num_extents += count_max_extents(old_size);
+ if (count_max_extents(new_size) >= num_extents)
return;
spin_lock(&BTRFS_I(inode)->lock);
@@ -1797,8 +1712,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 len = state->end + 1 - state->start;
- u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
- BTRFS_MAX_EXTENT_SIZE);
+ u32 num_extents = count_max_extents(len);
spin_lock(&BTRFS_I(inode)->lock);
if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1997,8 +1911,7 @@ out:
* at IO completion time based on sums calculated at bio submission time.
*/
static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 file_offset,
- struct list_head *list)
+ struct inode *inode, struct list_head *list)
{
struct btrfs_ordered_sum *sum;
@@ -2161,7 +2074,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
goto out;
if (!extent_inserted) {
- ins.objectid = btrfs_ino(inode);
+ ins.objectid = btrfs_ino(BTRFS_I(inode));
ins.offset = file_pos;
ins.type = BTRFS_EXTENT_DATA_KEY;
@@ -2194,8 +2107,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
ins.offset = disk_num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;
ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid,
- btrfs_ino(inode), file_pos,
- ram_bytes, &ins);
+ btrfs_ino(BTRFS_I(inode)), file_pos, ram_bytes, &ins);
/*
* Release the reserved range from inode dirty range map, as it is
* already moved into delayed_ref_head
@@ -2320,7 +2232,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
u64 num_bytes;
if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
- inum == btrfs_ino(inode))
+ inum == btrfs_ino(BTRFS_I(inode)))
return 0;
key.objectid = root_id;
@@ -2589,7 +2501,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
if (ret)
goto out_free_path;
again:
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = start;
@@ -2768,7 +2680,7 @@ record_old_file_extents(struct inode *inode,
if (!path)
goto out_kfree;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = new->file_pos;
@@ -2803,7 +2715,7 @@ record_old_file_extents(struct inode *inode,
btrfs_item_key_to_cpu(l, &key, slot);
- if (key.objectid != btrfs_ino(inode))
+ if (key.objectid != btrfs_ino(BTRFS_I(inode)))
break;
if (key.type != BTRFS_EXTENT_DATA_KEY)
break;
@@ -2993,8 +2905,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out_unlock;
}
- add_pending_csums(trans, inode, ordered_extent->file_offset,
- &ordered_extent->list);
+ add_pending_csums(trans, inode, &ordered_extent->list);
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
ret = btrfs_update_inode_fallback(trans, root, inode);
@@ -3123,9 +3034,8 @@ static int __readpage_endio_check(struct inode *inode,
kunmap_atomic(kaddr);
return 0;
zeroit:
- btrfs_warn_rl(BTRFS_I(inode)->root->fs_info,
- "csum failed ino %llu off %llu csum %u expected csum %u",
- btrfs_ino(inode), start, csum, csum_expected);
+ btrfs_print_data_csum_error(inode, start, csum, csum_expected,
+ io_bio->mirror_num);
memset(kaddr + pgoff, 1, len);
flush_dcache_page(page);
kunmap_atomic(kaddr);
@@ -3326,7 +3236,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
/* insert an orphan item to track this unlinked/truncated file */
if (insert >= 1) {
- ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
+ ret = btrfs_insert_orphan_item(trans, root,
+ btrfs_ino(BTRFS_I(inode)));
if (ret) {
atomic_dec(&root->orphan_inodes);
if (reserve) {
@@ -3382,7 +3293,7 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
atomic_dec(&root->orphan_inodes);
if (trans)
ret = btrfs_del_orphan_item(trans, root,
- btrfs_ino(inode));
+ btrfs_ino(BTRFS_I(inode)));
}
if (release_rsv)
@@ -3789,7 +3700,7 @@ cache_index:
goto cache_acl;
btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
- if (location.objectid != btrfs_ino(inode))
+ if (location.objectid != btrfs_ino(BTRFS_I(inode)))
goto cache_acl;
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -3811,14 +3722,14 @@ cache_acl:
* any xattrs or acls
*/
maybe_acls = acls_after_inode_item(leaf, path->slots[0],
- btrfs_ino(inode), &first_xattr_slot);
+ btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
if (first_xattr_slot != -1) {
path->slots[0] = first_xattr_slot;
ret = btrfs_load_inode_props(inode, path);
if (ret)
btrfs_err(fs_info,
"error loading props for ino %llu (root %llu): %d",
- btrfs_ino(inode),
+ btrfs_ino(BTRFS_I(inode)),
root->root_key.objectid, ret);
}
btrfs_free_path(path);
@@ -3835,10 +3746,7 @@ cache_acl:
break;
case S_IFDIR:
inode->i_fop = &btrfs_dir_file_operations;
- if (root == fs_info->tree_root)
- inode->i_op = &btrfs_dir_ro_inode_operations;
- else
- inode->i_op = &btrfs_dir_inode_operations;
+ inode->i_op = &btrfs_dir_inode_operations;
break;
case S_IFLNK:
inode->i_op = &btrfs_symlink_inode_operations;
@@ -3996,7 +3904,8 @@ noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
*/
static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir,
+ struct btrfs_inode *inode,
const char *name, int name_len)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4043,10 +3952,10 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
* that we delay to delete it, and just do this deletion when
* we update the inode item.
*/
- if (BTRFS_I(inode)->dir_index) {
+ if (inode->dir_index) {
ret = btrfs_delayed_delete_inode_ref(inode);
if (!ret) {
- index = BTRFS_I(inode)->dir_index;
+ index = inode->dir_index;
goto skip_backref;
}
}
@@ -4067,15 +3976,15 @@ skip_backref:
goto err;
}
- ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
- inode, dir_ino);
+ ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
+ dir_ino);
if (ret != 0 && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
goto err;
}
- ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
- dir, index);
+ ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
+ index);
if (ret == -ENOENT)
ret = 0;
else if (ret)
@@ -4085,26 +3994,27 @@ err:
if (ret)
goto out;
- btrfs_i_size_write(dir, dir->i_size - name_len * 2);
- inode_inc_iversion(inode);
- inode_inc_iversion(dir);
- inode->i_ctime = dir->i_mtime =
- dir->i_ctime = current_time(inode);
- ret = btrfs_update_inode(trans, root, dir);
+ btrfs_i_size_write(&dir->vfs_inode,
+ dir->vfs_inode.i_size - name_len * 2);
+ inode_inc_iversion(&inode->vfs_inode);
+ inode_inc_iversion(&dir->vfs_inode);
+ inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
+ dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
out:
return ret;
}
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
const char *name, int name_len)
{
int ret;
ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (!ret) {
- drop_nlink(inode);
- ret = btrfs_update_inode(trans, root, inode);
+ drop_nlink(&inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
}
return ret;
}
@@ -4142,10 +4052,12 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0);
+ btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+ 0);
- ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
- dentry->d_name.name, dentry->d_name.len);
+ ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+ BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+ dentry->d_name.len);
if (ret)
goto out;
@@ -4173,7 +4085,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct btrfs_key key;
u64 index;
int ret;
- u64 dir_ino = btrfs_ino(dir);
+ u64 dir_ino = btrfs_ino(BTRFS_I(dir));
path = btrfs_alloc_path();
if (!path)
@@ -4225,7 +4137,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index);
+ ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -4252,14 +4164,14 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
- if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
+ if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
return -EPERM;
trans = __unlink_start_trans(dir);
if (IS_ERR(trans))
return PTR_ERR(trans);
- if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+ if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
err = btrfs_unlink_subvol(trans, root, dir,
BTRFS_I(inode)->location.objectid,
dentry->d_name.name,
@@ -4274,8 +4186,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
/* now the directory is empty */
- err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
- dentry->d_name.name, dentry->d_name.len);
+ err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+ BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+ dentry->d_name.len);
if (!err) {
btrfs_i_size_write(inode, 0);
/*
@@ -4401,7 +4314,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int extent_type = -1;
int ret;
int err = 0;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
u64 bytes_deleted = 0;
bool be_nice = 0;
bool should_throttle = 0;
@@ -4440,7 +4353,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* items.
*/
if (min_type == 0 && root == BTRFS_I(inode)->root)
- btrfs_kill_delayed_inode_items(inode);
+ btrfs_kill_delayed_inode_items(BTRFS_I(inode));
key.objectid = ino;
key.offset = (u64)-1;
@@ -4689,11 +4602,22 @@ out:
btrfs_abort_transaction(trans, ret);
}
error:
- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
+ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
+ ASSERT(last_size >= new_size);
+ if (!err && last_size > new_size)
+ last_size = new_size;
btrfs_ordered_update_i_size(inode, last_size, NULL);
+ }
btrfs_free_path(path);
+ if (err == 0) {
+ /* only inline file may have last_size != new_size */
+ if (new_size >= fs_info->sectorsize ||
+ new_size > fs_info->max_inline)
+ ASSERT(last_size == new_size);
+ }
+
if (be_nice && bytes_deleted > SZ_32M) {
unsigned long updates = trans->delayed_ref_updates;
if (updates) {
@@ -4862,8 +4786,8 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
return ret;
}
- ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
- 0, 0, len, 0, len, 0, 0, 0);
+ ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+ offset, 0, 0, len, 0, len, 0, 0, 0);
if (ret)
btrfs_abort_transaction(trans, ret);
else
@@ -5079,6 +5003,13 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
if (ret && inode->i_nlink) {
int err;
+ /* To get a stable disk_i_size */
+ err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (err) {
+ btrfs_orphan_del(NULL, inode);
+ return err;
+ }
+
/*
* failed to truncate, disk_i_size is only adjusted down
* as we remove extents, so it should represent the true
@@ -5274,7 +5205,7 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
- ret = btrfs_commit_inode_delayed_inode(inode);
+ ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
if (ret) {
btrfs_orphan_del(NULL, inode);
goto no_delete;
@@ -5394,12 +5325,12 @@ void btrfs_evict_inode(struct inode *inode)
trans->block_rsv = &fs_info->trans_block_rsv;
if (!(root == fs_info->tree_root ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
- btrfs_return_ino(root, btrfs_ino(inode));
+ btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
no_delete:
- btrfs_remove_delayed_node(inode);
+ btrfs_remove_delayed_node(BTRFS_I(inode));
clear_inode(inode);
}
@@ -5421,8 +5352,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
if (!path)
return -ENOMEM;
- di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
- namelen, 0);
+ di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
+ name, namelen, 0);
if (IS_ERR(di))
ret = PTR_ERR(di);
@@ -5477,7 +5408,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
- if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
+ if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
goto out;
@@ -5512,7 +5443,7 @@ static void inode_tree_add(struct inode *inode)
struct rb_node **p;
struct rb_node *parent;
struct rb_node *new = &BTRFS_I(inode)->rb_node;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
if (inode_unhashed(inode))
return;
@@ -5523,9 +5454,9 @@ static void inode_tree_add(struct inode *inode)
parent = *p;
entry = rb_entry(parent, struct btrfs_inode, rb_node);
- if (ino < btrfs_ino(&entry->vfs_inode))
+ if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
p = &parent->rb_left;
- else if (ino > btrfs_ino(&entry->vfs_inode))
+ else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
p = &parent->rb_right;
else {
WARN_ON(!(entry->vfs_inode.i_state &
@@ -5585,9 +5516,9 @@ again:
prev = node;
entry = rb_entry(node, struct btrfs_inode, rb_node);
- if (objectid < btrfs_ino(&entry->vfs_inode))
+ if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
node = node->rb_left;
- else if (objectid > btrfs_ino(&entry->vfs_inode))
+ else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
node = node->rb_right;
else
break;
@@ -5595,7 +5526,7 @@ again:
if (!node) {
while (prev) {
entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(&entry->vfs_inode)) {
+ if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
node = prev;
break;
}
@@ -5604,7 +5535,7 @@ again:
}
while (node) {
entry = rb_entry(node, struct btrfs_inode, rb_node);
- objectid = btrfs_ino(&entry->vfs_inode) + 1;
+ objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
inode = igrab(&entry->vfs_inode);
if (inode) {
spin_unlock(&root->inode_lock);
@@ -5710,6 +5641,7 @@ static struct inode *new_simple_dir(struct super_block *s,
inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
inode->i_op = &btrfs_dir_ro_inode_operations;
+ inode->i_opflags &= ~IOP_XATTR;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_mtime = current_time(inode);
@@ -5787,7 +5719,7 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
if (btrfs_root_refs(&root->root_item) == 0)
return 1;
- if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return 1;
}
return 0;
@@ -5856,7 +5788,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = ctx->pos;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
@@ -6053,7 +5985,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
struct extent_buffer *leaf;
int ret;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = (u64)-1;
@@ -6085,7 +6017,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid != btrfs_ino(inode) ||
+ if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
found_key.type != BTRFS_DIR_INDEX_KEY) {
BTRFS_I(inode)->index_cnt = 2;
goto out;
@@ -6106,7 +6038,7 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
int ret = 0;
if (BTRFS_I(dir)->index_cnt == (u64)-1) {
- ret = btrfs_inode_delayed_dir_index_count(dir);
+ ret = btrfs_inode_delayed_dir_index_count(BTRFS_I(dir));
if (ret) {
ret = btrfs_set_inode_index_count(dir);
if (ret)
@@ -6285,7 +6217,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (ret)
btrfs_err(fs_info,
"error inheriting props for ino %llu (root %llu): %d",
- btrfs_ino(inode), root->root_key.objectid, ret);
+ btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
return inode;
@@ -6318,8 +6250,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
int ret = 0;
struct btrfs_key key;
struct btrfs_root *root = BTRFS_I(parent_inode)->root;
- u64 ino = btrfs_ino(inode);
- u64 parent_ino = btrfs_ino(parent_inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
+ u64 parent_ino = btrfs_ino(BTRFS_I(parent_inode));
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
@@ -6418,8 +6350,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- mode, &index);
+ dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+ mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
@@ -6490,8 +6422,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- mode, &index);
+ dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+ mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
@@ -6600,7 +6532,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
}
d_instantiate(dentry, inode);
- btrfs_log_new_name(trans, inode, NULL, parent);
+ btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
}
btrfs_balance_delayed_items(fs_info);
@@ -6640,8 +6572,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- S_IFDIR | mode, &index);
+ dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+ S_IFDIR | mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_fail;
@@ -6801,7 +6733,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
int err = 0;
u64 extent_start = 0;
u64 extent_end = 0;
- u64 objectid = btrfs_ino(inode);
+ u64 objectid = btrfs_ino(BTRFS_I(inode));
u32 found_type;
struct btrfs_path *path = NULL;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -7059,7 +6991,7 @@ insert:
write_unlock(&em_tree->lock);
out:
- trace_btrfs_get_extent(root, em);
+ trace_btrfs_get_extent(root, BTRFS_I(inode), em);
btrfs_free_path(path);
if (trans) {
@@ -7215,11 +7147,12 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
struct extent_map *em = NULL;
int ret;
- down_read(&BTRFS_I(inode)->dio_sem);
if (type != BTRFS_ORDERED_NOCOW) {
- em = create_pinned_em(inode, start, len, orig_start,
- block_start, block_len, orig_block_len,
- ram_bytes, type);
+ em = create_io_em(inode, start, len, orig_start,
+ block_start, block_len, orig_block_len,
+ ram_bytes,
+ BTRFS_COMPRESS_NONE, /* compress_type */
+ type);
if (IS_ERR(em))
goto out;
}
@@ -7234,7 +7167,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
em = ERR_PTR(ret);
}
out:
- up_read(&BTRFS_I(inode)->dio_sem);
return em;
}
@@ -7275,7 +7207,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *ram_bytes)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_trans_handle *trans;
struct btrfs_path *path;
int ret;
struct extent_buffer *leaf;
@@ -7295,8 +7226,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
if (!path)
return -ENOMEM;
- ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
- offset, 0);
+ ret = btrfs_lookup_file_extent(NULL, root, path,
+ btrfs_ino(BTRFS_I(inode)), offset, 0);
if (ret < 0)
goto out;
@@ -7312,7 +7243,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
ret = 0;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != btrfs_ino(inode) ||
+ if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
key.type != BTRFS_EXTENT_DATA_KEY) {
/* not our file or wrong item type, must cow */
goto out;
@@ -7378,15 +7309,9 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
* look for other files referencing this extent, if we
* find any we must cow
*/
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = 0;
- goto out;
- }
- ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
+ ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
key.offset - backref_offset, disk_bytenr);
- btrfs_end_transaction(trans);
if (ret) {
ret = 0;
goto out;
@@ -7563,17 +7488,23 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
return ret;
}
-static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
- u64 len, u64 orig_start,
- u64 block_start, u64 block_len,
- u64 orig_block_len, u64 ram_bytes,
- int type)
+/* The callers of this must take lock_extent() */
+static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
+ u64 orig_start, u64 block_start,
+ u64 block_len, u64 orig_block_len,
+ u64 ram_bytes, int compress_type,
+ int type)
{
struct extent_map_tree *em_tree;
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
+ ASSERT(type == BTRFS_ORDERED_PREALLOC ||
+ type == BTRFS_ORDERED_COMPRESSED ||
+ type == BTRFS_ORDERED_NOCOW ||
+ type == BTRFS_ORDERED_REGULAR);
+
em_tree = &BTRFS_I(inode)->extent_tree;
em = alloc_extent_map();
if (!em)
@@ -7581,8 +7512,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
em->start = start;
em->orig_start = orig_start;
- em->mod_start = start;
- em->mod_len = len;
em->len = len;
em->block_len = block_len;
em->block_start = block_start;
@@ -7591,8 +7520,12 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
em->ram_bytes = ram_bytes;
em->generation = -1;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
- if (type == BTRFS_ORDERED_PREALLOC)
+ if (type == BTRFS_ORDERED_PREALLOC) {
set_bit(EXTENT_FLAG_FILLING, &em->flags);
+ } else if (type == BTRFS_ORDERED_COMPRESSED) {
+ set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->compress_type = compress_type;
+ }
do {
btrfs_drop_extent_cache(inode, em->start,
@@ -7600,6 +7533,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
+ /*
+ * The caller has taken lock_extent(), who could race with us
+ * to add em?
+ */
} while (ret == -EEXIST);
if (ret) {
@@ -7607,6 +7544,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
return ERR_PTR(ret);
}
+ /* em got 2 refs now, callers needs to do free_extent_map once. */
return em;
}
@@ -7614,20 +7552,25 @@ static void adjust_dio_outstanding_extents(struct inode *inode,
struct btrfs_dio_data *dio_data,
const u64 len)
{
- unsigned num_extents;
+ unsigned num_extents = count_max_extents(len);
- num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
/*
* If we have an outstanding_extents count still set then we're
* within our reservation, otherwise we need to adjust our inode
* counter appropriately.
*/
- if (dio_data->outstanding_extents) {
+ if (dio_data->outstanding_extents >= num_extents) {
dio_data->outstanding_extents -= num_extents;
} else {
+ /*
+ * If dio write length has been split due to no large enough
+ * contiguous space, we need to compensate our inode counter
+ * appropriately.
+ */
+ u64 num_needed = num_extents - dio_data->outstanding_extents;
+
spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents += num_extents;
+ BTRFS_I(inode)->outstanding_extents += num_needed;
spin_unlock(&BTRFS_I(inode)->lock);
}
}
@@ -7790,7 +7733,7 @@ unlock:
* Need to update the i_size under the extent lock so buffered
* readers will get the updated i_size when we unlock.
*/
- if (start + len > i_size_read(inode))
+ if (!dio_data->overwrite && start + len > i_size_read(inode))
i_size_write(inode, start + len);
adjust_dio_outstanding_extents(inode, dio_data, len);
@@ -8240,7 +8183,8 @@ static void btrfs_end_dio_bio(struct bio *bio)
if (err)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
"direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
- btrfs_ino(dip->inode), bio_op(bio), bio->bi_opf,
+ btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
+ bio->bi_opf,
(unsigned long long)bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, err);
@@ -8665,15 +8609,14 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
* not unlock the i_mutex at this case.
*/
if (offset + count <= inode->i_size) {
+ dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
if (ret)
goto out;
- dio_data.outstanding_extents = div64_u64(count +
- BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ dio_data.outstanding_extents = count_max_extents(count);
/*
* We need to know how many extents we reserved so that we can
@@ -8685,6 +8628,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.unsubmitted_oe_range_start = (u64)offset;
dio_data.unsubmitted_oe_range_end = (u64)offset;
current->journal_info = &dio_data;
+ down_read(&BTRFS_I(inode)->dio_sem);
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags)) {
inode_dio_end(inode);
@@ -8697,6 +8641,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
iter, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, flags);
if (iov_iter_rw(iter) == WRITE) {
+ up_read(&BTRFS_I(inode)->dio_sem);
current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED) {
if (dio_data.reserve)
@@ -8815,7 +8760,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
if (PageWriteback(page) || PageDirty(page))
return 0;
- return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
+ return __btrfs_releasepage(page, gfp_flags);
}
static void btrfs_invalidatepage(struct page *page, unsigned int offset,
@@ -9016,7 +8961,7 @@ again:
* we can't set the delalloc bits if there are pending ordered
* extents. Drop our locks and wait for them to finish
*/
- ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
+ ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
if (ordered) {
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
@@ -9040,11 +8985,11 @@ again:
}
/*
- * XXX - page_mkwrite gets called every time the page is dirtied, even
- * if it was already dirty, so for space accounting reasons we need to
- * clear any delalloc bits for the range we are fixing to save. There
- * is probably a better way to do this, but for now keep consistent with
- * prepare_pages in the normal write path.
+ * page_mkwrite gets called when the page is firstly dirtied after it's
+ * faulted in, but write(2) could also dirty a page and set delalloc
+ * bits, thus in this case for space account reason, we still need to
+ * clear any delalloc bits within this page range since we have to
+ * reserve data&meta space before lock_page() (see above comments).
*/
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -9205,6 +9150,7 @@ static int btrfs_truncate(struct inode *inode)
break;
}
+ btrfs_block_rsv_release(fs_info, rsv, -1);
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
rsv, min_size, 0);
BUG_ON(ret); /* shouldn't happen */
@@ -9367,7 +9313,7 @@ void btrfs_destroy_inode(struct inode *inode)
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_info(fs_info, "inode %llu still on the orphan list",
- btrfs_ino(inode));
+ btrfs_ino(BTRFS_I(inode)));
atomic_dec(&root->orphan_inodes);
}
@@ -9496,8 +9442,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
struct inode *old_inode = old_dentry->d_inode;
struct timespec ctime = current_time(old_inode);
struct dentry *parent;
- u64 old_ino = btrfs_ino(old_inode);
- u64 new_ino = btrfs_ino(new_inode);
+ u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
+ u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
u64 old_idx = 0;
u64 new_idx = 0;
u64 root_objectid;
@@ -9554,7 +9500,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_dentry->d_name.name,
new_dentry->d_name.len,
old_ino,
- btrfs_ino(new_dir), old_idx);
+ btrfs_ino(BTRFS_I(new_dir)),
+ old_idx);
if (ret)
goto out_fail;
}
@@ -9570,7 +9517,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
old_dentry->d_name.name,
old_dentry->d_name.len,
new_ino,
- btrfs_ino(old_dir), new_idx);
+ btrfs_ino(BTRFS_I(old_dir)),
+ new_idx);
if (ret)
goto out_fail;
}
@@ -9586,8 +9534,10 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_inode->i_ctime = ctime;
if (old_dentry->d_parent != new_dentry->d_parent) {
- btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
- btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
+ btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
+ BTRFS_I(old_inode), 1);
+ btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
+ BTRFS_I(new_inode), 1);
}
/* src is a subvolume */
@@ -9598,8 +9548,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else { /* src is an inode */
- ret = __btrfs_unlink_inode(trans, root, old_dir,
- old_dentry->d_inode,
+ ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
+ BTRFS_I(old_dentry->d_inode),
old_dentry->d_name.name,
old_dentry->d_name.len);
if (!ret)
@@ -9618,8 +9568,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_dentry->d_name.name,
new_dentry->d_name.len);
} else { /* dest is an inode */
- ret = __btrfs_unlink_inode(trans, dest, new_dir,
- new_dentry->d_inode,
+ ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
+ BTRFS_I(new_dentry->d_inode),
new_dentry->d_name.name,
new_dentry->d_name.len);
if (!ret)
@@ -9653,13 +9603,15 @@ static int btrfs_rename_exchange(struct inode *old_dir,
if (root_log_pinned) {
parent = new_dentry->d_parent;
- btrfs_log_new_name(trans, old_inode, old_dir, parent);
+ btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+ parent);
btrfs_end_log_trans(root);
root_log_pinned = false;
}
if (dest_log_pinned) {
parent = old_dentry->d_parent;
- btrfs_log_new_name(trans, new_inode, new_dir, parent);
+ btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
+ parent);
btrfs_end_log_trans(dest);
dest_log_pinned = false;
}
@@ -9676,11 +9628,11 @@ out_fail:
* allow the tasks to sync it.
*/
if (ret && (root_log_pinned || dest_log_pinned)) {
- if (btrfs_inode_in_log(old_dir, fs_info->generation) ||
- btrfs_inode_in_log(new_dir, fs_info->generation) ||
- btrfs_inode_in_log(old_inode, fs_info->generation) ||
+ if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
+ btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
+ btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
(new_inode &&
- btrfs_inode_in_log(new_inode, fs_info->generation)))
+ btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
btrfs_set_log_full_commit(fs_info, trans);
if (root_log_pinned) {
@@ -9719,7 +9671,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
inode = btrfs_new_inode(trans, root, dir,
dentry->d_name.name,
dentry->d_name.len,
- btrfs_ino(dir),
+ btrfs_ino(BTRFS_I(dir)),
objectid,
S_IFCHR | WHITEOUT_MODE,
&index);
@@ -9767,10 +9719,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
u64 index = 0;
u64 root_objectid;
int ret;
- u64 old_ino = btrfs_ino(old_inode);
+ u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
bool log_pinned = false;
- if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
/* we only allow rename subvolume link between subvolumes */
@@ -9778,7 +9730,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
return -EXDEV;
if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
- (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
+ (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
return -ENOTEMPTY;
if (S_ISDIR(old_inode->i_mode) && new_inode &&
@@ -9853,7 +9805,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry->d_name.name,
new_dentry->d_name.len,
old_ino,
- btrfs_ino(new_dir), index);
+ btrfs_ino(BTRFS_I(new_dir)), index);
if (ret)
goto out_fail;
}
@@ -9866,7 +9818,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = current_time(old_dir);
if (old_dentry->d_parent != new_dentry->d_parent)
- btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
+ btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
+ BTRFS_I(old_inode), 1);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
@@ -9874,8 +9827,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else {
- ret = __btrfs_unlink_inode(trans, root, old_dir,
- d_inode(old_dentry),
+ ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
+ BTRFS_I(d_inode(old_dentry)),
old_dentry->d_name.name,
old_dentry->d_name.len);
if (!ret)
@@ -9889,7 +9842,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode) {
inode_inc_iversion(new_inode);
new_inode->i_ctime = current_time(new_inode);
- if (unlikely(btrfs_ino(new_inode) ==
+ if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
root_objectid = BTRFS_I(new_inode)->location.objectid;
ret = btrfs_unlink_subvol(trans, dest, new_dir,
@@ -9898,8 +9851,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry->d_name.len);
BUG_ON(new_inode->i_nlink == 0);
} else {
- ret = btrfs_unlink_inode(trans, dest, new_dir,
- d_inode(new_dentry),
+ ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
+ BTRFS_I(d_inode(new_dentry)),
new_dentry->d_name.name,
new_dentry->d_name.len);
}
@@ -9925,7 +9878,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (log_pinned) {
struct dentry *parent = new_dentry->d_parent;
- btrfs_log_new_name(trans, old_inode, old_dir, parent);
+ btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+ parent);
btrfs_end_log_trans(root);
log_pinned = false;
}
@@ -9952,11 +9906,11 @@ out_fail:
* allow the tasks to sync it.
*/
if (ret && log_pinned) {
- if (btrfs_inode_in_log(old_dir, fs_info->generation) ||
- btrfs_inode_in_log(new_dir, fs_info->generation) ||
- btrfs_inode_in_log(old_inode, fs_info->generation) ||
+ if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
+ btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
+ btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
(new_inode &&
- btrfs_inode_in_log(new_inode, fs_info->generation)))
+ btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
btrfs_set_log_full_commit(fs_info, trans);
btrfs_end_log_trans(root);
@@ -10220,8 +10174,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- S_IFLNK|S_IRWXUGO, &index);
+ dentry->d_name.len, btrfs_ino(BTRFS_I(dir)),
+ objectid, S_IFLNK|S_IRWXUGO, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
@@ -10247,7 +10201,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
err = -ENOMEM;
goto out_unlock_inode;
}
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.offset = 0;
key.type = BTRFS_EXTENT_DATA_KEY;
datasize = btrfs_file_extent_calc_inline_size(name_len);
@@ -10500,7 +10454,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out;
inode = btrfs_new_inode(trans, root, dir, NULL, 0,
- btrfs_ino(dir), objectid, mode, &index);
+ btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
inode = NULL;
@@ -10572,8 +10526,6 @@ static const struct inode_operations btrfs_dir_inode_operations = {
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
- .get_acl = btrfs_get_acl,
- .set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 33f967d30b2a..717f34f977d5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -395,7 +395,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
q = bdev_get_queue(device->bdev);
if (blk_queue_discard(q)) {
num_devices++;
- minlen = min((u64)q->limits.discard_granularity,
+ minlen = min_t(u64, q->limits.discard_granularity,
minlen);
}
}
@@ -487,8 +487,7 @@ static noinline int create_subvol(struct inode *dir,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_subvolume_release_metadata(fs_info, &block_rsv,
- qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv);
goto fail_free;
}
trans->block_rsv = &block_rsv;
@@ -601,7 +600,7 @@ static noinline int create_subvol(struct inode *dir,
ret = btrfs_add_root_ref(trans, fs_info,
objectid, root->root_key.objectid,
- btrfs_ino(dir), index, name, namelen);
+ btrfs_ino(BTRFS_I(dir)), index, name, namelen);
BUG_ON(ret);
ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid,
@@ -613,7 +612,7 @@ fail:
kfree(root_item);
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv);
if (async_transid) {
*async_transid = trans->transid;
@@ -657,7 +656,7 @@ static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root)
}
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
- struct dentry *dentry, char *name, int namelen,
+ struct dentry *dentry,
u64 *async_transid, bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
@@ -670,12 +669,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
return -EINVAL;
- pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
+ pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
if (!pending_snapshot)
return -ENOMEM;
pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
- GFP_NOFS);
+ GFP_KERNEL);
pending_snapshot->path = btrfs_alloc_path();
if (!pending_snapshot->root_item || !pending_snapshot->path) {
ret = -ENOMEM;
@@ -753,9 +752,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
d_instantiate(dentry, inode);
ret = 0;
fail:
- btrfs_subvolume_release_metadata(fs_info,
- &pending_snapshot->block_rsv,
- pending_snapshot->qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
dec_and_free:
if (atomic_dec_and_test(&root->will_be_snapshoted))
wake_up_atomic_t(&root->will_be_snapshoted);
@@ -874,7 +871,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
goto out_up_read;
if (snap_src) {
- error = create_snapshot(snap_src, dir, dentry, name, namelen,
+ error = create_snapshot(snap_src, dir, dentry,
async_transid, readonly, inherit);
} else {
error = create_subvol(dir, dentry, name, namelen,
@@ -941,7 +938,7 @@ static int find_new_extents(struct btrfs_root *root,
struct btrfs_file_extent_item *extent;
int type;
int ret;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
@@ -1780,7 +1777,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
int ret = 0;
u64 flags = 0;
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID)
return -EINVAL;
down_read(&fs_info->subvol_sem);
@@ -1812,7 +1809,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
if (ret)
goto out;
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
goto out_drop_write;
}
@@ -2446,7 +2443,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (err)
goto out_dput;
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out_dput;
}
@@ -2497,7 +2494,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
- btrfs_record_snapshot_destroy(trans, dir);
+ btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
ret = btrfs_unlink_subvol(trans, root, dir,
dest->root_key.objectid,
@@ -2555,7 +2552,7 @@ out_end_trans:
err = ret;
inode->i_flags |= S_DEAD;
out_release:
- btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv);
out_up_write:
up_write(&fs_info->subvol_sem);
if (err) {
@@ -2613,9 +2610,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
ret = btrfs_defrag_root(root);
- if (ret)
- goto out;
- ret = btrfs_defrag_root(root->fs_info->extent_root);
break;
case S_IFREG:
if (!(file->f_mode & FMODE_WRITE)) {
@@ -3059,8 +3053,7 @@ out:
return 0;
}
-static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
- u64 dst_loff, u64 len, struct cmp_pages *cmp)
+static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
{
int ret = 0;
int i;
@@ -3128,26 +3121,27 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
int ret;
u64 len = olen;
struct cmp_pages cmp;
- int same_inode = 0;
+ bool same_inode = (src == dst);
u64 same_lock_start = 0;
u64 same_lock_len = 0;
- if (src == dst)
- same_inode = 1;
-
if (len == 0)
return 0;
- if (same_inode) {
+ if (same_inode)
inode_lock(src);
+ else
+ btrfs_double_inode_lock(src, dst);
- ret = extent_same_check_offsets(src, loff, &len, olen);
- if (ret)
- goto out_unlock;
- ret = extent_same_check_offsets(src, dst_loff, &len, olen);
- if (ret)
- goto out_unlock;
+ ret = extent_same_check_offsets(src, loff, &len, olen);
+ if (ret)
+ goto out_unlock;
+ ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
+ if (ret)
+ goto out_unlock;
+
+ if (same_inode) {
/*
* Single inode case wants the same checks, except we
* don't want our length pushed out past i_size as
@@ -3175,16 +3169,6 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
same_lock_start = min_t(u64, loff, dst_loff);
same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
- } else {
- btrfs_double_inode_lock(src, dst);
-
- ret = extent_same_check_offsets(src, loff, &len, olen);
- if (ret)
- goto out_unlock;
-
- ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
- if (ret)
- goto out_unlock;
}
/* don't make the dst file partly checksummed */
@@ -3236,7 +3220,7 @@ again:
}
/* pass original length for comparison so we stay within i_size */
- ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
+ ret = btrfs_cmp_data(olen, &cmp);
if (ret == 0)
ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
@@ -3399,8 +3383,7 @@ static void clone_update_extent_map(struct inode *inode,
* data into the destination inode's inline extent if the later is greater then
* the former.
*/
-static int clone_copy_inline_extent(struct inode *src,
- struct inode *dst,
+static int clone_copy_inline_extent(struct inode *dst,
struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_key *new_key,
@@ -3420,7 +3403,7 @@ static int clone_copy_inline_extent(struct inode *src,
if (new_key->offset > 0)
return -EOPNOTSUPP;
- key.objectid = btrfs_ino(dst);
+ key.objectid = btrfs_ino(BTRFS_I(dst));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -3435,7 +3418,7 @@ static int clone_copy_inline_extent(struct inode *src,
goto copy_inline_extent;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- if (key.objectid == btrfs_ino(dst) &&
+ if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
key.type == BTRFS_EXTENT_DATA_KEY) {
ASSERT(key.offset > 0);
return -EOPNOTSUPP;
@@ -3469,7 +3452,7 @@ static int clone_copy_inline_extent(struct inode *src,
} else if (ret == 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key,
path->slots[0]);
- if (key.objectid == btrfs_ino(dst) &&
+ if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
key.type == BTRFS_EXTENT_DATA_KEY)
return -EOPNOTSUPP;
}
@@ -3563,7 +3546,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
path->reada = READA_FORWARD;
/* clone data */
- key.objectid = btrfs_ino(src);
+ key.objectid = btrfs_ino(BTRFS_I(src));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = off;
@@ -3606,7 +3589,7 @@ process_slot:
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.type > BTRFS_EXTENT_DATA_KEY ||
- key.objectid != btrfs_ino(src))
+ key.objectid != btrfs_ino(BTRFS_I(src)))
break;
if (key.type == BTRFS_EXTENT_DATA_KEY) {
@@ -3659,7 +3642,7 @@ process_slot:
path->leave_spinning = 0;
memcpy(&new_key, &key, sizeof(new_key));
- new_key.objectid = btrfs_ino(inode);
+ new_key.objectid = btrfs_ino(BTRFS_I(inode));
if (off <= key.offset)
new_key.offset = key.offset + destoff - off;
else
@@ -3749,7 +3732,7 @@ process_slot:
fs_info,
disko, diskl, 0,
root->root_key.objectid,
- btrfs_ino(inode),
+ btrfs_ino(BTRFS_I(inode)),
new_key.offset - datao);
if (ret) {
btrfs_abort_transaction(trans,
@@ -3779,7 +3762,7 @@ process_slot:
size -= skip + trim;
datal -= skip + trim;
- ret = clone_copy_inline_extent(src, inode,
+ ret = clone_copy_inline_extent(inode,
trans, path,
&new_key,
drop_start,
@@ -5129,7 +5112,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
down_write(&fs_info->subvol_sem);
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
goto out;
}
@@ -5653,6 +5636,10 @@ long btrfs_ioctl(struct file *file, unsigned int
#ifdef CONFIG_COMPAT
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
+ /*
+ * These all access 32-bit values anyway so no further
+ * handling is necessary.
+ */
switch (cmd) {
case FS_IOC32_GETFLAGS:
cmd = FS_IOC_GETFLAGS;
@@ -5663,8 +5650,6 @@ long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case FS_IOC32_GETVERSION:
cmd = FS_IOC_GETVERSION;
break;
- default:
- return -ENOIOCTLCMD;
}
return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 041c3326d109..bc2aba810629 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -432,7 +432,7 @@ out:
}
/* Needs to either be called under a log transaction or the log_mutex */
-void btrfs_get_logged_extents(struct inode *inode,
+void btrfs_get_logged_extents(struct btrfs_inode *inode,
struct list_head *logged_list,
const loff_t start,
const loff_t end)
@@ -442,7 +442,7 @@ void btrfs_get_logged_extents(struct inode *inode,
struct rb_node *n;
struct rb_node *prev;
- tree = &BTRFS_I(inode)->ordered_tree;
+ tree = &inode->ordered_tree;
spin_lock_irq(&tree->lock);
n = __tree_search(&tree->tree, end, &prev);
if (!n)
@@ -984,8 +984,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
}
disk_i_size = BTRFS_I(inode)->disk_i_size;
- /* truncate file */
- if (disk_i_size > i_size) {
+ /*
+ * truncate file.
+ * If ordered is not NULL, then this is called from endio and
+ * disk_i_size will be updated by either truncate itself or any
+ * in-flight IOs which are inside the disk_i_size.
+ *
+ * Because btrfs_setsize() may set i_size with disk_i_size if truncate
+ * fails somehow, we need to make sure we have a precise disk_i_size by
+ * updating it as usual.
+ *
+ */
+ if (!ordered && disk_i_size > i_size) {
BTRFS_I(inode)->disk_i_size = orig_offset;
ret = 0;
goto out;
@@ -1032,25 +1042,22 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
/* We treat this entry as if it doesn't exist */
if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
continue;
- if (test->file_offset + test->len <= disk_i_size)
+
+ if (entry_end(test) <= disk_i_size)
break;
if (test->file_offset >= i_size)
break;
- if (entry_end(test) > disk_i_size) {
- /*
- * we don't update disk_i_size now, so record this
- * undealt i_size. Or we will not know the real
- * i_size.
- */
- if (test->outstanding_isize < offset)
- test->outstanding_isize = offset;
- if (ordered &&
- ordered->outstanding_isize >
- test->outstanding_isize)
- test->outstanding_isize =
- ordered->outstanding_isize;
- goto out;
- }
+
+ /*
+ * We don't update disk_i_size now, so record this undealt
+ * i_size. Or we will not know the real i_size.
+ */
+ if (test->outstanding_isize < offset)
+ test->outstanding_isize = offset;
+ if (ordered &&
+ ordered->outstanding_isize > test->outstanding_isize)
+ test->outstanding_isize = ordered->outstanding_isize;
+ goto out;
}
new_i_size = min_t(u64, offset, i_size);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 5f2b0ca28705..a8cb8efe6fae 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -75,6 +75,8 @@ struct btrfs_ordered_sum {
* in the logging code. */
#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
* complete in the current transaction. */
+#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */
+
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
@@ -201,7 +203,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
const u64 range_start, const u64 range_len);
int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len);
-void btrfs_get_logged_extents(struct inode *inode,
+void btrfs_get_logged_extents(struct btrfs_inode *inode,
struct list_head *logged_list,
const loff_t start,
const loff_t end);
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index f2621e330954..d6cb155ef7a1 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -279,7 +279,7 @@ static void inode_prop_iterator(void *ctx,
if (unlikely(ret))
btrfs_warn(root->fs_info,
"error applying prop %s to ino %llu (root %llu): %d",
- handler->xattr_name, btrfs_ino(inode),
+ handler->xattr_name, btrfs_ino(BTRFS_I(inode)),
root->root_key.objectid, ret);
else
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
@@ -288,7 +288,7 @@ static void inode_prop_iterator(void *ctx,
int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
int ret;
ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 662821f1252c..a5da750c1087 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -319,7 +319,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return 0;
- fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
+ fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
if (!fs_info->qgroup_ulist) {
ret = -ENOMEM;
goto out;
@@ -876,7 +876,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
goto out;
}
- fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
+ fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
if (!fs_info->qgroup_ulist) {
ret = -ENOMEM;
goto out;
@@ -1019,7 +1019,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
list_del(&quota_root->dirty_list);
btrfs_tree_lock(quota_root->node);
- clean_tree_block(trans, fs_info, quota_root->node);
+ clean_tree_block(fs_info, quota_root->node);
btrfs_tree_unlock(quota_root->node);
btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
@@ -1038,6 +1038,15 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
}
+static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup,
+ u64 num_bytes)
+{
+ btrfs_warn(fs_info,
+ "qgroup %llu reserved space underflow, have: %llu, to free: %llu",
+ qgroup->qgroupid, qgroup->reserved, num_bytes);
+ qgroup->reserved = 0;
+}
/*
* The easy accounting, if we are adding/removing the only ref for an extent
* then this qgroup and all of the parent qgroups get their reference and
@@ -1065,8 +1074,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
- if (sign > 0)
- qgroup->reserved -= num_bytes;
+ if (sign > 0) {
+ if (WARN_ON(qgroup->reserved < num_bytes))
+ report_reserved_underflow(fs_info, qgroup, num_bytes);
+ else
+ qgroup->reserved -= num_bytes;
+ }
qgroup_dirty(fs_info, qgroup);
@@ -1086,8 +1099,13 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
qgroup->rfer_cmpr += sign * num_bytes;
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
- if (sign > 0)
- qgroup->reserved -= num_bytes;
+ if (sign > 0) {
+ if (WARN_ON(qgroup->reserved < num_bytes))
+ report_reserved_underflow(fs_info, qgroup,
+ num_bytes);
+ else
+ qgroup->reserved -= num_bytes;
+ }
qgroup->excl_cmpr += sign * num_bytes;
qgroup_dirty(fs_info, qgroup);
@@ -1156,7 +1174,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
return -EINVAL;
- tmp = ulist_alloc(GFP_NOFS);
+ tmp = ulist_alloc(GFP_KERNEL);
if (!tmp)
return -ENOMEM;
@@ -1205,7 +1223,7 @@ out:
return ret;
}
-int __del_qgroup_relation(struct btrfs_trans_handle *trans,
+static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
struct btrfs_root *quota_root;
@@ -1216,7 +1234,7 @@ int __del_qgroup_relation(struct btrfs_trans_handle *trans,
int ret = 0;
int err;
- tmp = ulist_alloc(GFP_NOFS);
+ tmp = ulist_alloc(GFP_KERNEL);
if (!tmp)
return -ENOMEM;
@@ -1446,8 +1464,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
while (node) {
record = rb_entry(node, struct btrfs_qgroup_extent_record,
node);
- ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
- &record->old_roots);
+ if (WARN_ON(!record->old_roots))
+ ret = btrfs_find_all_roots(NULL, fs_info,
+ record->bytenr, 0, &record->old_roots);
if (ret < 0)
break;
if (qgroup_to_skip)
@@ -1486,6 +1505,28 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
return 0;
}
+int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_extent_record *qrecord)
+{
+ struct ulist *old_root;
+ u64 bytenr = qrecord->bytenr;
+ int ret;
+
+ ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Here we don't need to get the lock of
+ * trans->transaction->delayed_refs, since inserted qrecord won't
+ * be deleted, only qrecord->node may be modified (new qrecord insert)
+ *
+ * So modifying qrecord->old_roots is safe here
+ */
+ qrecord->old_roots = old_root;
+ return 0;
+}
+
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
gfp_t gfp_flag)
@@ -1511,9 +1552,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
spin_lock(&delayed_refs->lock);
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
spin_unlock(&delayed_refs->lock);
- if (ret > 0)
+ if (ret > 0) {
kfree(record);
- return 0;
+ return 0;
+ }
+ return btrfs_qgroup_trace_extent_post(fs_info, record);
}
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -1571,8 +1614,7 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
* If we increment the root nodes slot counter past the number of
* elements, 1 is returned to signal completion of the search.
*/
-static int adjust_slots_upwards(struct btrfs_root *root,
- struct btrfs_path *path, int root_level)
+static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
{
int level = 0;
int nr, slot;
@@ -1713,7 +1755,7 @@ walk_down:
goto out;
/* Nonzero return here means we completed our search */
- ret = adjust_slots_upwards(root, path, root_level);
+ ret = adjust_slots_upwards(path, root_level);
if (ret)
break;
@@ -1927,13 +1969,14 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
u64 nr_old_roots = 0;
int ret = 0;
+ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
+ return 0;
+
if (new_roots)
nr_new_roots = new_roots->nnodes;
if (old_roots)
nr_old_roots = old_roots->nnodes;
- if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
- goto out_free;
BUG_ON(!fs_info->quota_root);
trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
@@ -2170,9 +2213,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
goto out;
}
- rcu_read_lock();
level_size = fs_info->nodesize;
- rcu_read_unlock();
}
/*
@@ -2306,7 +2347,20 @@ out:
return ret;
}
-static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
+static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
+{
+ if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
+ qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
+ return false;
+
+ if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
+ qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
+ return false;
+
+ return true;
+}
+
+static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
@@ -2347,16 +2401,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
qg = unode_aux_to_qgroup(unode);
- if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
- qg->reserved + (s64)qg->rfer + num_bytes >
- qg->max_rfer) {
- ret = -EDQUOT;
- goto out;
- }
-
- if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
- qg->reserved + (s64)qg->excl + num_bytes >
- qg->max_excl) {
+ if (enforce && !qgroup_check_limits(qg, num_bytes)) {
ret = -EDQUOT;
goto out;
}
@@ -2424,7 +2469,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
qg = unode_aux_to_qgroup(unode);
- qg->reserved -= num_bytes;
+ if (WARN_ON(qg->reserved < num_bytes))
+ report_reserved_underflow(fs_info, qg, num_bytes);
+ else
+ qg->reserved -= num_bytes;
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
@@ -2439,11 +2487,6 @@ out:
spin_unlock(&fs_info->qgroup_lock);
}
-static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes)
-{
- return btrfs_qgroup_free_refroot(root->fs_info, root->objectid,
- num_bytes);
-}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
@@ -2803,7 +2846,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
return 0;
changeset.bytes_changed = 0;
- changeset.range_changed = ulist_alloc(GFP_NOFS);
+ ulist_init(&changeset.range_changed);
ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
trace_btrfs_qgroup_reserve_data(inode, start, len,
@@ -2811,21 +2854,21 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
QGROUP_RESERVE);
if (ret < 0)
goto cleanup;
- ret = qgroup_reserve(root, changeset.bytes_changed);
+ ret = qgroup_reserve(root, changeset.bytes_changed, true);
if (ret < 0)
goto cleanup;
- ulist_free(changeset.range_changed);
+ ulist_release(&changeset.range_changed);
return ret;
cleanup:
/* cleanup already reserved ranges */
ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(changeset.range_changed, &uiter)))
+ while ((unode = ulist_next(&changeset.range_changed, &uiter)))
clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
GFP_NOFS);
- ulist_free(changeset.range_changed);
+ ulist_release(&changeset.range_changed);
return ret;
}
@@ -2837,23 +2880,22 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
int ret;
changeset.bytes_changed = 0;
- changeset.range_changed = ulist_alloc(GFP_NOFS);
- if (!changeset.range_changed)
- return -ENOMEM;
-
+ ulist_init(&changeset.range_changed);
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
if (ret < 0)
goto out;
if (free) {
- qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+ btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
+ BTRFS_I(inode)->root->objectid,
+ changeset.bytes_changed);
trace_op = QGROUP_FREE;
}
trace_btrfs_qgroup_release_data(inode, start, len,
changeset.bytes_changed, trace_op);
out:
- ulist_free(changeset.range_changed);
+ ulist_release(&changeset.range_changed);
return ret;
}
@@ -2892,7 +2934,8 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
return __btrfs_qgroup_release_data(inode, start, len, 0);
}
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+ bool enforce)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@@ -2902,7 +2945,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
return 0;
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
- ret = qgroup_reserve(root, num_bytes);
+ ret = qgroup_reserve(root, num_bytes, enforce);
if (ret < 0)
return ret;
atomic_add(num_bytes, &root->qgroup_meta_rsv);
@@ -2921,7 +2964,7 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
if (reserved == 0)
return;
- qgroup_free(root, reserved);
+ btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
}
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
@@ -2935,7 +2978,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
atomic_sub(num_bytes, &root->qgroup_meta_rsv);
- qgroup_free(root, num_bytes);
+ btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
}
/*
@@ -2950,22 +2993,22 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
int ret;
changeset.bytes_changed = 0;
- changeset.range_changed = ulist_alloc(GFP_NOFS);
- if (WARN_ON(!changeset.range_changed))
- return;
-
+ ulist_init(&changeset.range_changed);
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_QGROUP_RESERVED, &changeset);
WARN_ON(ret < 0);
if (WARN_ON(changeset.bytes_changed)) {
ULIST_ITER_INIT(&iter);
- while ((unode = ulist_next(changeset.range_changed, &iter))) {
+ while ((unode = ulist_next(&changeset.range_changed, &iter))) {
btrfs_warn(BTRFS_I(inode)->root->fs_info,
"leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
inode->i_ino, unode->val, unode->aux);
}
- qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+ btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
+ BTRFS_I(inode)->root->objectid,
+ changeset.bytes_changed);
+
}
- ulist_free(changeset.range_changed);
+ ulist_release(&changeset.range_changed);
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 416ae8e1d23c..26932a8a1993 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -94,9 +94,10 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
/*
* Inform qgroup to trace one dirty extent, its info is recorded in @record.
- * So qgroup can account it at commit trans time.
+ * So qgroup can account it at transaction committing time.
*
- * No lock version, caller must acquire delayed ref lock and allocate memory.
+ * No lock version, caller must acquire delayed ref lock and allocated memory,
+ * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
*
* Return 0 for success insert
* Return >0 for existing record, caller can free @record safely.
@@ -108,11 +109,37 @@ int btrfs_qgroup_trace_extent_nolock(
struct btrfs_qgroup_extent_record *record);
/*
+ * Post handler after qgroup_trace_extent_nolock().
+ *
+ * NOTE: Current qgroup does the expensive backref walk at transaction
+ * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
+ * new transaction.
+ * This is designed to allow btrfs_find_all_roots() to get correct new_roots
+ * result.
+ *
+ * However for old_roots there is no need to do backref walk at that time,
+ * since we search commit roots to walk backref and result will always be
+ * correct.
+ *
+ * Due to the nature of no lock version, we can't do backref there.
+ * So we must call btrfs_qgroup_trace_extent_post() after exiting
+ * spinlock context.
+ *
+ * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
+ * using current root, then we can move all expensive backref walk out of
+ * transaction committing, but not now as qgroup accounting will be wrong again.
+ */
+int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_extent_record *qrecord);
+
+/*
* Inform qgroup to trace one dirty extent, specified by @bytenr and
* @num_bytes.
* So qgroup can account it at commit trans time.
*
- * Better encapsulated version.
+ * Better encapsulated version, with memory allocation and backref walk for
+ * commit roots.
+ * So this can sleep.
*
* Return 0 if the operation is done.
* Return <0 for error, like memory allocation failure or invalid parameter
@@ -181,7 +208,8 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len);
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes);
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+ bool enforce);
void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
void btrfs_qgroup_check_reserved_leak(struct inode *inode);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d2a9a1ee5361..1571bf26dc07 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -677,11 +677,9 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
struct btrfs_raid_bio *freeit = NULL;
struct btrfs_raid_bio *cache_drop = NULL;
int ret = 0;
- int walk = 0;
spin_lock_irqsave(&h->lock, flags);
list_for_each_entry(cur, &h->hash_list, hash_list) {
- walk++;
if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
spin_lock(&cur->bio_list_lock);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 379711048fb0..ddbde0f08365 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1548,9 +1548,9 @@ again:
prev = node;
entry = rb_entry(node, struct btrfs_inode, rb_node);
- if (objectid < btrfs_ino(&entry->vfs_inode))
+ if (objectid < btrfs_ino(entry))
node = node->rb_left;
- else if (objectid > btrfs_ino(&entry->vfs_inode))
+ else if (objectid > btrfs_ino(entry))
node = node->rb_right;
else
break;
@@ -1558,7 +1558,7 @@ again:
if (!node) {
while (prev) {
entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(&entry->vfs_inode)) {
+ if (objectid <= btrfs_ino(entry)) {
node = prev;
break;
}
@@ -1573,7 +1573,7 @@ again:
return inode;
}
- objectid = btrfs_ino(&entry->vfs_inode) + 1;
+ objectid = btrfs_ino(entry) + 1;
if (cond_resched_lock(&root->inode_lock))
goto again;
@@ -1609,8 +1609,8 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
return -ENOMEM;
bytenr -= BTRFS_I(reloc_inode)->index_cnt;
- ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
- bytenr, 0);
+ ret = btrfs_lookup_file_extent(NULL, root, path,
+ btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
if (ret < 0)
goto out;
if (ret > 0) {
@@ -1698,11 +1698,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
if (first) {
inode = find_next_inode(root, key.objectid);
first = 0;
- } else if (inode && btrfs_ino(inode) < key.objectid) {
+ } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
btrfs_add_delayed_iput(inode);
inode = find_next_inode(root, key.objectid);
}
- if (inode && btrfs_ino(inode) == key.objectid) {
+ if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
end = key.offset +
btrfs_file_extent_num_bytes(leaf, fi);
WARN_ON(!IS_ALIGNED(key.offset,
@@ -2088,7 +2088,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
inode = find_next_inode(root, objectid);
if (!inode)
break;
- ino = btrfs_ino(inode);
+ ino = btrfs_ino(BTRFS_I(inode));
if (ino > max_key->objectid) {
iput(inode);
@@ -3543,7 +3543,7 @@ truncate:
goto out;
}
- ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode);
+ ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
@@ -4334,7 +4334,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
BUG_ON(!rc->block_group);
- ret = btrfs_inc_block_group_ro(extent_root, rc->block_group);
+ ret = btrfs_inc_block_group_ro(fs_info, rc->block_group);
if (ret) {
err = ret;
goto out;
@@ -4347,8 +4347,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
goto out;
}
- inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
- path);
+ inode = lookup_free_space_inode(fs_info, rc->block_group, path);
btrfs_free_path(path);
if (!IS_ERR(inode))
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 4c6735491ee0..a08224eab8b4 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -74,7 +74,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
*
* If we find something return 0, otherwise > 0, < 0 on error.
*/
-int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
+int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
struct btrfs_path *path, struct btrfs_root_item *root_item,
struct btrfs_key *root_key)
{
@@ -207,7 +207,7 @@ out:
}
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_key *key, struct btrfs_root_item *item)
+ const struct btrfs_key *key, struct btrfs_root_item *item)
{
/*
* Make sure generation v1 and v2 match. See update_root for details.
@@ -337,7 +337,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
/* drop the root item for 'key' from 'root' */
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_key *key)
+ const struct btrfs_key *key)
{
struct btrfs_path *path;
int ret;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9a94670536a6..ff9a11c39f5e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -282,9 +282,7 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
u64 *extent_physical,
struct btrfs_device **extent_dev,
int *extent_mirror_num);
-static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
- struct scrub_wr_ctx *wr_ctx,
- struct btrfs_fs_info *fs_info,
+static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
struct btrfs_device *dev,
int is_dev_replace);
static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
@@ -501,7 +499,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
spin_lock_init(&sctx->stat_lock);
init_waitqueue_head(&sctx->list_wait);
- ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
+ ret = scrub_setup_wr_ctx(&sctx->wr_ctx,
fs_info->dev_replace.tgtdev, is_dev_replace);
if (ret) {
scrub_free_ctx(sctx);
@@ -3584,7 +3582,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* -> btrfs_scrub_pause()
*/
scrub_pause_on(fs_info);
- ret = btrfs_inc_block_group_ro(root, cache);
+ ret = btrfs_inc_block_group_ro(fs_info, cache);
if (!ret && is_dev_replace) {
/*
* If we are doing a device replace wait for any tasks
@@ -4084,9 +4082,7 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
btrfs_put_bbio(bbio);
}
-static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
- struct scrub_wr_ctx *wr_ctx,
- struct btrfs_fs_info *fs_info,
+static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
struct btrfs_device *dev,
int is_dev_replace)
{
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d145ce804620..456c8901489b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1681,6 +1681,9 @@ static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
{
int ret;
+ if (ino == BTRFS_FIRST_FREE_OBJECTID)
+ return 1;
+
ret = get_cur_inode_state(sctx, ino, gen);
if (ret < 0)
goto out;
@@ -1866,7 +1869,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
* not deleted and then re-created, if it was then we have no overwrite
* and we can just unlink this entry.
*/
- if (sctx->parent_root) {
+ if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
NULL, NULL, NULL);
if (ret < 0 && ret != -ENOENT)
@@ -1934,6 +1937,19 @@ static int did_overwrite_ref(struct send_ctx *sctx,
if (ret <= 0)
goto out;
+ if (dir != BTRFS_FIRST_FREE_OBJECTID) {
+ ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
+ NULL, NULL, NULL);
+ if (ret < 0 && ret != -ENOENT)
+ goto out;
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ if (gen != dir_gen)
+ goto out;
+ }
+
/* check if the ref was overwritten by another ref */
ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
&ow_inode, &other_type);
@@ -3556,6 +3572,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
{
int ret = 0;
u64 ino = parent_ref->dir;
+ u64 ino_gen = parent_ref->dir_gen;
u64 parent_ino_before, parent_ino_after;
struct fs_path *path_before = NULL;
struct fs_path *path_after = NULL;
@@ -3576,6 +3593,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
* at get_cur_path()).
*/
while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+ u64 parent_ino_after_gen;
+
if (is_waiting_for_move(sctx, ino)) {
/*
* If the current inode is an ancestor of ino in the
@@ -3598,7 +3617,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
fs_path_reset(path_after);
ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
- NULL, path_after);
+ &parent_ino_after_gen, path_after);
if (ret < 0)
goto out;
ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
@@ -3615,10 +3634,20 @@ static int wait_for_parent_move(struct send_ctx *sctx,
if (ino > sctx->cur_ino &&
(parent_ino_before != parent_ino_after || len1 != len2 ||
memcmp(path_before->start, path_after->start, len1))) {
- ret = 1;
- break;
+ u64 parent_ino_gen;
+
+ ret = get_inode_info(sctx->parent_root, ino, NULL,
+ &parent_ino_gen, NULL, NULL, NULL,
+ NULL);
+ if (ret < 0)
+ goto out;
+ if (ino_gen == parent_ino_gen) {
+ ret = 1;
+ break;
+ }
}
ino = parent_ino_after;
+ ino_gen = parent_ino_after_gen;
}
out:
@@ -5277,6 +5306,81 @@ out:
return ret;
}
+static int range_is_hole_in_parent(struct send_ctx *sctx,
+ const u64 start,
+ const u64 end)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_root *root = sctx->parent_root;
+ u64 search_start = start;
+ int ret;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = search_start;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0 && path->slots[0] > 0)
+ path->slots[0]--;
+
+ while (search_start < end) {
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ struct btrfs_file_extent_item *fi;
+ u64 extent_end;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid < sctx->cur_ino ||
+ key.type < BTRFS_EXTENT_DATA_KEY)
+ goto next;
+ if (key.objectid > sctx->cur_ino ||
+ key.type > BTRFS_EXTENT_DATA_KEY ||
+ key.offset >= end)
+ break;
+
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(leaf, fi) ==
+ BTRFS_FILE_EXTENT_INLINE) {
+ u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
+
+ extent_end = ALIGN(key.offset + size,
+ root->fs_info->sectorsize);
+ } else {
+ extent_end = key.offset +
+ btrfs_file_extent_num_bytes(leaf, fi);
+ }
+ if (extent_end <= start)
+ goto next;
+ if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
+ search_start = extent_end;
+ goto next;
+ }
+ ret = 0;
+ goto out;
+next:
+ path->slots[0]++;
+ }
+ ret = 1;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
struct btrfs_key *key)
{
@@ -5321,8 +5425,17 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
return ret;
}
- if (sctx->cur_inode_last_extent < key->offset)
- ret = send_hole(sctx, key->offset);
+ if (sctx->cur_inode_last_extent < key->offset) {
+ ret = range_is_hole_in_parent(sctx,
+ sctx->cur_inode_last_extent,
+ key->offset);
+ if (ret < 0)
+ return ret;
+ else if (ret == 0)
+ ret = send_hole(sctx, key->offset);
+ else
+ ret = 0;
+ }
sctx->cur_inode_last_extent = extent_end;
return ret;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b5ae7d3d1896..da687dc79cce 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -265,7 +265,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
function, line, errstr);
return;
}
- ACCESS_ONCE(trans->transaction->aborted) = errno;
+ WRITE_ONCE(trans->transaction->aborted, errno);
/* Wake up anybody who may be waiting on this transaction */
wake_up(&fs_info->transaction_wait);
wake_up(&fs_info->transaction_blocked_wait);
@@ -1114,7 +1114,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
static int btrfs_fill_super(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
- void *data, int silent)
+ void *data)
{
struct inode *inode;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
@@ -1611,8 +1611,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
btrfs_sb(s)->bdev_holder = fs_type;
- error = btrfs_fill_super(s, fs_devices, data,
- flags & MS_SILENT ? 1 : 0);
+ error = btrfs_fill_super(s, fs_devices, data);
}
if (error) {
deactivate_locked_super(s);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0e0508f488b2..6b3e0fc2fe7a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -474,7 +474,8 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
static struct btrfs_trans_handle *
start_transaction(struct btrfs_root *root, unsigned int num_items,
- unsigned int type, enum btrfs_reserve_flush_enum flush)
+ unsigned int type, enum btrfs_reserve_flush_enum flush,
+ bool enforce_qgroups)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -505,9 +506,10 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
* Do the reservation before we join the transaction so we can do all
* the appropriate flushing if need be.
*/
- if (num_items > 0 && root != fs_info->chunk_root) {
+ if (num_items && root != fs_info->chunk_root) {
qgroup_reserved = num_items * fs_info->nodesize;
- ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved);
+ ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved,
+ enforce_qgroups);
if (ret)
return ERR_PTR(ret);
@@ -613,8 +615,9 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
unsigned int num_items)
{
return start_transaction(root, num_items, TRANS_START,
- BTRFS_RESERVE_FLUSH_ALL);
+ BTRFS_RESERVE_FLUSH_ALL, true);
}
+
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
struct btrfs_root *root,
unsigned int num_items,
@@ -625,7 +628,14 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
u64 num_bytes;
int ret;
- trans = btrfs_start_transaction(root, num_items);
+ /*
+ * We have two callers: unlink and block group removal. The
+ * former should succeed even if we will temporarily exceed
+ * quota and the latter operates on the extent root so
+ * qgroup enforcement is ignored anyway.
+ */
+ trans = start_transaction(root, num_items, TRANS_START,
+ BTRFS_RESERVE_FLUSH_ALL, false);
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans;
@@ -654,25 +664,25 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush(
unsigned int num_items)
{
return start_transaction(root, num_items, TRANS_START,
- BTRFS_RESERVE_FLUSH_LIMIT);
+ BTRFS_RESERVE_FLUSH_LIMIT, true);
}
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
{
- return start_transaction(root, 0, TRANS_JOIN,
- BTRFS_RESERVE_NO_FLUSH);
+ return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
+ true);
}
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
- BTRFS_RESERVE_NO_FLUSH);
+ BTRFS_RESERVE_NO_FLUSH, true);
}
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_USERSPACE,
- BTRFS_RESERVE_NO_FLUSH);
+ BTRFS_RESERVE_NO_FLUSH, true);
}
/*
@@ -691,7 +701,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_ATTACH,
- BTRFS_RESERVE_NO_FLUSH);
+ BTRFS_RESERVE_NO_FLUSH, true);
}
/*
@@ -707,7 +717,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
struct btrfs_trans_handle *trans;
trans = start_transaction(root, 0, TRANS_ATTACH,
- BTRFS_RESERVE_NO_FLUSH);
+ BTRFS_RESERVE_NO_FLUSH, true);
if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
btrfs_wait_for_commit(root->fs_info, 0);
@@ -866,14 +876,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (lock && !atomic_read(&info->open_ioctl_trans) &&
should_end_transaction(trans) &&
- ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
+ READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
spin_lock(&info->trans_lock);
if (cur_trans->state == TRANS_STATE_RUNNING)
cur_trans->state = TRANS_STATE_BLOCKED;
spin_unlock(&info->trans_lock);
}
- if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
+ if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
if (throttle)
return btrfs_commit_transaction(trans);
else
@@ -1354,12 +1364,8 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
* enabled. If this check races with the ioctl, rescan will
* kick in anyway.
*/
- mutex_lock(&fs_info->qgroup_ioctl_lock);
- if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
- mutex_unlock(&fs_info->qgroup_ioctl_lock);
+ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return 0;
- }
- mutex_unlock(&fs_info->qgroup_ioctl_lock);
/*
* We are going to commit transaction, see btrfs_commit_transaction()
@@ -1504,7 +1510,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/* check if there is a file/dir which has the same name. */
dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
- btrfs_ino(parent_inode),
+ btrfs_ino(BTRFS_I(parent_inode)),
dentry->d_name.name,
dentry->d_name.len, 0);
if (dir_item != NULL && !IS_ERR(dir_item)) {
@@ -1598,7 +1604,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
*/
ret = btrfs_add_root_ref(trans, fs_info, objectid,
parent_root->root_key.objectid,
- btrfs_ino(parent_inode), index,
+ btrfs_ino(BTRFS_I(parent_inode)), index,
dentry->d_name.name, dentry->d_name.len);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -1940,7 +1946,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
int ret;
/* Stop the commit early if ->aborted is set */
- if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+ if (unlikely(READ_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
btrfs_end_transaction(trans);
return ret;
@@ -2080,7 +2086,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
atomic_read(&cur_trans->num_writers) == 1);
/* ->aborted might be set after the previous check, so check it */
- if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+ if (unlikely(READ_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
goto scrub_continue;
}
@@ -2194,14 +2200,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* The tasks which save the space cache and inode cache may also
* update ->aborted, check it.
*/
- if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+ if (unlikely(READ_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&fs_info->reloc_mutex);
goto scrub_continue;
}
- btrfs_prepare_extent_commit(trans, fs_info);
+ btrfs_prepare_extent_commit(fs_info);
cur_trans = fs_info->running_transaction;
@@ -2251,7 +2257,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
goto scrub_continue;
}
- ret = write_ctree_super(trans, fs_info, 0);
+ ret = write_all_supers(fs_info, 0);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
goto scrub_continue;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f10bf5213ed8..3cc972330ab3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -37,6 +37,7 @@
*/
#define LOG_INODE_ALL 0
#define LOG_INODE_EXISTS 1
+#define LOG_OTHER_INODE 2
/*
* directory trouble cases
@@ -96,7 +97,7 @@
#define LOG_WALK_REPLAY_ALL 3
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
int inode_only,
const loff_t start,
const loff_t end,
@@ -630,8 +631,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
* file. This must be done before the btrfs_drop_extents run
* so we don't try to drop this extent.
*/
- ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
- start, 0);
+ ret = btrfs_lookup_file_extent(trans, root, path,
+ btrfs_ino(BTRFS_I(inode)), start, 0);
if (ret == 0 &&
(found_type == BTRFS_FILE_EXTENT_REG ||
@@ -672,6 +673,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
unsigned long dest_offset;
struct btrfs_key ins;
+ if (btrfs_file_extent_disk_bytenr(eb, item) == 0 &&
+ btrfs_fs_incompat(fs_info, NO_HOLES))
+ goto update_inode;
+
ret = btrfs_insert_empty_item(trans, root, path, key,
sizeof(*item));
if (ret)
@@ -824,6 +829,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
}
inode_add_bytes(inode, nbytes);
+update_inode:
ret = btrfs_update_inode(trans, root, inode);
out:
if (inode)
@@ -842,7 +848,7 @@ out:
static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct inode *dir,
+ struct btrfs_inode *dir,
struct btrfs_dir_item *di)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -874,7 +880,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
+ ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name,
+ name_len);
if (ret)
goto out;
else
@@ -990,8 +997,8 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_root *log_root,
- struct inode *dir, struct inode *inode,
- struct extent_buffer *eb,
+ struct btrfs_inode *dir,
+ struct btrfs_inode *inode,
u64 inode_objectid, u64 parent_objectid,
u64 ref_index, char *name, int namelen,
int *search_done)
@@ -1046,12 +1053,11 @@ again:
parent_objectid,
victim_name,
victim_name_len)) {
- inc_nlink(inode);
+ inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
- ret = btrfs_unlink_inode(trans, root, dir,
- inode, victim_name,
- victim_name_len);
+ ret = btrfs_unlink_inode(trans, root, dir, inode,
+ victim_name, victim_name_len);
kfree(victim_name);
if (ret)
return ret;
@@ -1114,16 +1120,16 @@ again:
victim_name_len)) {
ret = -ENOENT;
victim_parent = read_one_inode(root,
- parent_objectid);
+ parent_objectid);
if (victim_parent) {
- inc_nlink(inode);
+ inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
ret = btrfs_unlink_inode(trans, root,
- victim_parent,
- inode,
- victim_name,
- victim_name_len);
+ BTRFS_I(victim_parent),
+ inode,
+ victim_name,
+ victim_name_len);
if (!ret)
ret = btrfs_run_delayed_items(
trans,
@@ -1294,8 +1300,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
goto out;
/* if we already have a perfect match, we're done */
- if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
- ref_index, name, namelen)) {
+ if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
+ btrfs_ino(BTRFS_I(inode)), ref_index,
+ name, namelen)) {
/*
* look for a conflicting back reference in the
* metadata. if we find one we have to unlink that name
@@ -1306,7 +1313,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
if (!search_done) {
ret = __add_inode_ref(trans, root, path, log,
- dir, inode, eb,
+ BTRFS_I(dir),
+ BTRFS_I(inode),
inode_objectid,
parent_objectid,
ref_index, name, namelen,
@@ -1359,7 +1367,7 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans,
}
static int count_inode_extrefs(struct btrfs_root *root,
- struct inode *inode, struct btrfs_path *path)
+ struct btrfs_inode *inode, struct btrfs_path *path)
{
int ret = 0;
int name_len;
@@ -1403,7 +1411,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
}
static int count_inode_refs(struct btrfs_root *root,
- struct inode *inode, struct btrfs_path *path)
+ struct btrfs_inode *inode, struct btrfs_path *path)
{
int ret;
struct btrfs_key key;
@@ -1476,19 +1484,19 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
int ret;
u64 nlink = 0;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- ret = count_inode_refs(root, inode, path);
+ ret = count_inode_refs(root, BTRFS_I(inode), path);
if (ret < 0)
goto out;
nlink = ret;
- ret = count_inode_extrefs(root, inode, path);
+ ret = count_inode_extrefs(root, BTRFS_I(inode), path);
if (ret < 0)
goto out;
@@ -1768,7 +1776,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
if (!exists)
goto out;
- ret = drop_one_dir_item(trans, root, path, dir, dst_di);
+ ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), dst_di);
if (ret)
goto out;
@@ -2051,8 +2059,8 @@ again:
}
inc_nlink(inode);
- ret = btrfs_unlink_inode(trans, root, dir, inode,
- name, name_len);
+ ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+ BTRFS_I(inode), name, name_len);
if (!ret)
ret = btrfs_run_delayed_items(trans, fs_info);
kfree(name);
@@ -2468,7 +2476,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- clean_tree_block(trans, fs_info, next);
+ clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
}
@@ -2548,7 +2556,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- clean_tree_block(trans, fs_info, next);
+ clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
}
@@ -2626,7 +2634,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- clean_tree_block(trans, fs_info, next);
+ clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
}
@@ -2957,7 +2965,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* the running transaction open, so a full commit can't hop
* in and cause problems either.
*/
- ret = write_ctree_super(trans, fs_info, 1);
+ ret = write_all_supers(fs_info, 1);
if (ret) {
btrfs_set_log_full_commit(fs_info, trans);
btrfs_abort_transaction(trans, ret);
@@ -3083,7 +3091,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
- struct inode *dir, u64 index)
+ struct btrfs_inode *dir, u64 index)
{
struct btrfs_root *log;
struct btrfs_dir_item *di;
@@ -3093,14 +3101,14 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
int bytes_del = 0;
u64 dir_ino = btrfs_ino(dir);
- if (BTRFS_I(dir)->logged_trans < trans->transid)
+ if (dir->logged_trans < trans->transid)
return 0;
ret = join_running_log_trans(root);
if (ret)
return 0;
- mutex_lock(&BTRFS_I(dir)->log_mutex);
+ mutex_lock(&dir->log_mutex);
log = root->log_root;
path = btrfs_alloc_path();
@@ -3175,7 +3183,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
fail:
btrfs_free_path(path);
out_unlock:
- mutex_unlock(&BTRFS_I(dir)->log_mutex);
+ mutex_unlock(&dir->log_mutex);
if (ret == -ENOSPC) {
btrfs_set_log_full_commit(root->fs_info, trans);
ret = 0;
@@ -3191,25 +3199,25 @@ out_unlock:
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
- struct inode *inode, u64 dirid)
+ struct btrfs_inode *inode, u64 dirid)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *log;
u64 index;
int ret;
- if (BTRFS_I(inode)->logged_trans < trans->transid)
+ if (inode->logged_trans < trans->transid)
return 0;
ret = join_running_log_trans(root);
if (ret)
return 0;
log = root->log_root;
- mutex_lock(&BTRFS_I(inode)->log_mutex);
+ mutex_lock(&inode->log_mutex);
ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
dirid, &index);
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
+ mutex_unlock(&inode->log_mutex);
if (ret == -ENOSPC) {
btrfs_set_log_full_commit(fs_info, trans);
ret = 0;
@@ -3259,7 +3267,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
* to replay anything deleted before the fsync
*/
static noinline int log_dir_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path, int key_type,
struct btrfs_log_ctx *ctx,
@@ -3449,7 +3457,7 @@ done:
* key logged by this transaction.
*/
static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path,
struct btrfs_log_ctx *ctx)
@@ -3463,9 +3471,8 @@ again:
min_key = 0;
max_key = 0;
while (1) {
- ret = log_dir_items(trans, root, inode, path,
- dst_path, key_type, ctx, min_key,
- &max_key);
+ ret = log_dir_items(trans, root, inode, path, dst_path, key_type,
+ ctx, min_key, &max_key);
if (ret)
return ret;
if (max_key == (u64)-1)
@@ -3594,34 +3601,34 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
static int log_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_root *log, struct btrfs_path *path,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
struct btrfs_inode_item *inode_item;
int ret;
ret = btrfs_insert_empty_item(trans, log, path,
- &BTRFS_I(inode)->location,
- sizeof(*inode_item));
+ &inode->location, sizeof(*inode_item));
if (ret && ret != -EEXIST)
return ret;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
- fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
+ fill_inode_item(trans, path->nodes[0], inode_item, &inode->vfs_inode,
+ 0, 0);
btrfs_release_path(path);
return 0;
}
static noinline int copy_items(struct btrfs_trans_handle *trans,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *dst_path,
struct btrfs_path *src_path, u64 *last_extent,
int start_slot, int nr, int inode_only,
u64 logged_isize)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
unsigned long src_offset;
unsigned long dst_offset;
- struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
+ struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *extent;
struct btrfs_inode_item *inode_item;
struct extent_buffer *src = src_path->nodes[0];
@@ -3632,7 +3639,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
char *ins_data;
int i;
struct list_head ordered_sums;
- int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+ int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
bool has_extents = false;
bool need_find_last_extent = true;
bool done = false;
@@ -3674,7 +3681,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
dst_path->slots[0],
struct btrfs_inode_item);
fill_inode_item(trans, dst_path->nodes[0], inode_item,
- inode, inode_only == LOG_INODE_EXISTS,
+ &inode->vfs_inode,
+ inode_only == LOG_INODE_EXISTS,
logged_isize);
} else {
copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
@@ -3782,7 +3790,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
if (need_find_last_extent) {
u64 len;
- ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
+ ret = btrfs_prev_leaf(inode->root, src_path);
if (ret < 0)
return ret;
if (ret)
@@ -3824,8 +3832,8 @@ fill_holes:
if (need_find_last_extent) {
/* btrfs_prev_leaf could return 1 without releasing the path */
btrfs_release_path(src_path);
- ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key,
- src_path, 0, 0);
+ ret = btrfs_search_slot(NULL, inode->root, &first_key,
+ src_path, 0, 0);
if (ret < 0)
return ret;
ASSERT(ret == 0);
@@ -3845,7 +3853,7 @@ fill_holes:
u64 extent_end;
if (i >= btrfs_header_nritems(src_path->nodes[0])) {
- ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
+ ret = btrfs_next_leaf(inode->root, src_path);
if (ret < 0)
return ret;
ASSERT(ret == 0);
@@ -3880,8 +3888,7 @@ fill_holes:
offset = *last_extent;
len = key.offset - *last_extent;
ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
- offset, 0, 0, len, 0, len, 0,
- 0, 0);
+ offset, 0, 0, len, 0, len, 0, 0, 0);
if (ret)
break;
*last_extent = extent_end;
@@ -4054,7 +4061,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
}
static int log_one_extent(struct btrfs_trans_handle *trans,
- struct inode *inode, struct btrfs_root *root,
+ struct btrfs_inode *inode, struct btrfs_root *root,
const struct extent_map *em,
struct btrfs_path *path,
const struct list_head *logged_list,
@@ -4071,8 +4078,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
int extent_inserted = 0;
bool ordered_io_err = false;
- ret = wait_ordered_extents(trans, inode, root, em, logged_list,
- &ordered_io_err);
+ ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em,
+ logged_list, &ordered_io_err);
if (ret)
return ret;
@@ -4083,7 +4090,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_init_map_token(&token);
- ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
+ ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
em->start + em->len, NULL, 0, 1,
sizeof(*fi), &extent_inserted);
if (ret)
@@ -4149,7 +4156,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *path,
struct list_head *logged_list,
struct btrfs_log_ctx *ctx,
@@ -4158,14 +4165,14 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
{
struct extent_map *em, *n;
struct list_head extents;
- struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map_tree *tree = &inode->extent_tree;
u64 test_gen;
int ret = 0;
int num = 0;
INIT_LIST_HEAD(&extents);
- down_write(&BTRFS_I(inode)->dio_sem);
+ down_write(&inode->dio_sem);
write_lock(&tree->lock);
test_gen = root->fs_info->last_trans_committed;
@@ -4205,7 +4212,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
* without writing to the log tree and the fsync must report the
* file data write error and not commit the current transaction.
*/
- ret = filemap_check_errors(inode->i_mapping);
+ ret = filemap_check_errors(inode->vfs_inode.i_mapping);
if (ret)
ctx->io_err = ret;
process:
@@ -4234,13 +4241,13 @@ process:
}
WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock);
- up_write(&BTRFS_I(inode)->dio_sem);
+ up_write(&inode->dio_sem);
btrfs_release_path(path);
return ret;
}
-static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
+static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
struct btrfs_path *path, u64 *size_ret)
{
struct btrfs_key key;
@@ -4278,7 +4285,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
*/
static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path)
{
@@ -4373,7 +4380,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
*/
static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4384,7 +4391,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_root *log = root->log_root;
const u64 ino = btrfs_ino(inode);
- const u64 i_size = i_size_read(inode);
+ const u64 i_size = i_size_read(&inode->vfs_inode);
if (!btrfs_fs_incompat(fs_info, NO_HOLES))
return 0;
@@ -4494,7 +4501,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
static int btrfs_check_ref_name_override(struct extent_buffer *eb,
const int slot,
const struct btrfs_key *key,
- struct inode *inode,
+ struct btrfs_inode *inode,
u64 *other_ino)
{
int ret;
@@ -4550,9 +4557,8 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
}
read_extent_buffer(eb, name, name_ptr, this_name_len);
- di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
- search_path, parent,
- name, this_name_len, 0);
+ di = btrfs_lookup_dir_item(NULL, inode->root, search_path,
+ parent, name, this_name_len, 0);
if (di && !IS_ERR(di)) {
struct btrfs_key di_key;
@@ -4595,7 +4601,7 @@ out:
* This handles both files and directories.
*/
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
int inode_only,
const loff_t start,
const loff_t end,
@@ -4617,7 +4623,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
int ins_nr;
bool fast_search = false;
u64 ino = btrfs_ino(inode);
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map_tree *em_tree = &inode->extent_tree;
u64 logged_isize = 0;
bool need_log_inode_item = true;
@@ -4638,10 +4644,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
/* today the code can only do partial logging of directories */
- if (S_ISDIR(inode->i_mode) ||
+ if (S_ISDIR(inode->vfs_inode.i_mode) ||
(!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags) &&
- inode_only == LOG_INODE_EXISTS))
+ &inode->runtime_flags) &&
+ inode_only >= LOG_INODE_EXISTS))
max_key.type = BTRFS_XATTR_ITEM_KEY;
else
max_key.type = (u8)-1;
@@ -4653,8 +4659,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* order for the log replay code to mark inodes for link count
* fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
*/
- if (S_ISDIR(inode->i_mode) ||
- BTRFS_I(inode)->generation > fs_info->last_trans_committed)
+ if (S_ISDIR(inode->vfs_inode.i_mode) ||
+ inode->generation > fs_info->last_trans_committed)
ret = btrfs_commit_inode_delayed_items(trans, inode);
else
ret = btrfs_commit_inode_delayed_inode(inode);
@@ -4665,13 +4671,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
return ret;
}
- mutex_lock(&BTRFS_I(inode)->log_mutex);
+ if (inode_only == LOG_OTHER_INODE) {
+ inode_only = LOG_INODE_EXISTS;
+ mutex_lock_nested(&inode->log_mutex, SINGLE_DEPTH_NESTING);
+ } else {
+ mutex_lock(&inode->log_mutex);
+ }
/*
* a brute force approach to making sure we get the most uptodate
* copies of everything.
*/
- if (S_ISDIR(inode->i_mode)) {
+ if (S_ISDIR(inode->vfs_inode.i_mode)) {
int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
if (inode_only == LOG_INODE_EXISTS)
@@ -4692,31 +4703,30 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* (zeroes), as if an expanding truncate happened,
* instead of getting a file of 4Kb only.
*/
- err = logged_inode_size(log, inode, path,
- &logged_isize);
+ err = logged_inode_size(log, inode, path, &logged_isize);
if (err)
goto out_unlock;
}
if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags)) {
+ &inode->runtime_flags)) {
if (inode_only == LOG_INODE_EXISTS) {
max_key.type = BTRFS_XATTR_ITEM_KEY;
ret = drop_objectid_items(trans, log, path, ino,
max_key.type);
} else {
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
while(1) {
ret = btrfs_truncate_inode_items(trans,
- log, inode, 0, 0);
+ log, &inode->vfs_inode, 0, 0);
if (ret != -EAGAIN)
break;
}
}
} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
- &BTRFS_I(inode)->runtime_flags) ||
+ &inode->runtime_flags) ||
inode_only == LOG_INODE_EXISTS) {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
@@ -4757,18 +4767,17 @@ again:
if ((min_key.type == BTRFS_INODE_REF_KEY ||
min_key.type == BTRFS_INODE_EXTREF_KEY) &&
- BTRFS_I(inode)->generation == trans->transid) {
+ inode->generation == trans->transid) {
u64 other_ino = 0;
ret = btrfs_check_ref_name_override(path->nodes[0],
- path->slots[0],
- &min_key, inode,
- &other_ino);
+ path->slots[0], &min_key, inode,
+ &other_ino);
if (ret < 0) {
err = ret;
goto out_unlock;
} else if (ret > 0 && ctx &&
- other_ino != btrfs_ino(ctx->inode)) {
+ other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
struct btrfs_key inode_key;
struct inode *other_inode;
@@ -4816,9 +4825,10 @@ again:
* update the log with the new name before we
* unpin it.
*/
- err = btrfs_log_inode(trans, root, other_inode,
- LOG_INODE_EXISTS,
- 0, LLONG_MAX, ctx);
+ err = btrfs_log_inode(trans, root,
+ BTRFS_I(other_inode),
+ LOG_OTHER_INODE, 0, LLONG_MAX,
+ ctx);
iput(other_inode);
if (err)
goto out_unlock;
@@ -4972,25 +4982,25 @@ log_extents:
write_unlock(&em_tree->lock);
}
- if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
+ if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) {
ret = log_directory_changes(trans, root, inode, path, dst_path,
- ctx);
+ ctx);
if (ret) {
err = ret;
goto out_unlock;
}
}
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->logged_trans = trans->transid;
- BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
+ inode->logged_trans = trans->transid;
+ inode->last_log_commit = inode->last_sub_trans;
+ spin_unlock(&inode->lock);
out_unlock:
if (unlikely(err))
btrfs_put_logged_extents(&logged_list);
else
btrfs_submit_logged_extents(&logged_list, log);
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
+ mutex_unlock(&inode->log_mutex);
btrfs_free_path(path);
btrfs_free_path(dst_path);
@@ -5014,13 +5024,13 @@ out_unlock:
* we logged the inode or it might have also done the unlink).
*/
static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
bool ret = false;
- mutex_lock(&BTRFS_I(inode)->log_mutex);
- if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) {
+ mutex_lock(&inode->log_mutex);
+ if (inode->last_unlink_trans > fs_info->last_trans_committed) {
/*
* Make sure any commits to the log are forced to be full
* commits.
@@ -5028,7 +5038,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_set_log_full_commit(fs_info, trans);
ret = true;
}
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
+ mutex_unlock(&inode->log_mutex);
return ret;
}
@@ -5077,7 +5087,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->logged_trans = trans->transid;
smp_mb();
- if (btrfs_must_commit_transaction(trans, inode)) {
+ if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) {
ret = 1;
break;
}
@@ -5087,7 +5097,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
if (IS_ROOT(parent)) {
inode = d_inode(parent);
- if (btrfs_must_commit_transaction(trans, inode))
+ if (btrfs_must_commit_transaction(trans, BTRFS_I(inode)))
ret = 1;
break;
}
@@ -5152,7 +5162,7 @@ struct btrfs_dir_list {
*/
static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *start_inode,
+ struct btrfs_inode *start_inode,
struct btrfs_log_ctx *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -5230,7 +5240,7 @@ process_leaf:
goto next_dir_inode;
}
- if (btrfs_inode_in_log(di_inode, trans->transid)) {
+ if (btrfs_inode_in_log(BTRFS_I(di_inode), trans->transid)) {
iput(di_inode);
break;
}
@@ -5238,10 +5248,10 @@ process_leaf:
ctx->log_new_dentries = false;
if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
log_mode = LOG_INODE_ALL;
- ret = btrfs_log_inode(trans, root, di_inode,
+ ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode),
log_mode, 0, LLONG_MAX, ctx);
if (!ret &&
- btrfs_must_commit_transaction(trans, di_inode))
+ btrfs_must_commit_transaction(trans, BTRFS_I(di_inode)))
ret = 1;
iput(di_inode);
if (ret)
@@ -5290,7 +5300,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_root *root = BTRFS_I(inode)->root;
- const u64 ino = btrfs_ino(inode);
+ const u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
@@ -5358,14 +5368,14 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
if (ctx)
ctx->log_new_dentries = false;
- ret = btrfs_log_inode(trans, root, dir_inode,
+ ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode),
LOG_INODE_ALL, 0, LLONG_MAX, ctx);
if (!ret &&
- btrfs_must_commit_transaction(trans, dir_inode))
+ btrfs_must_commit_transaction(trans, BTRFS_I(dir_inode)))
ret = 1;
if (!ret && ctx && ctx->log_new_dentries)
ret = log_new_dir_dentries(trans, root,
- dir_inode, ctx);
+ BTRFS_I(dir_inode), ctx);
iput(dir_inode);
if (ret)
goto out;
@@ -5429,7 +5439,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
- if (btrfs_inode_in_log(inode, trans->transid)) {
+ if (btrfs_inode_in_log(BTRFS_I(inode), trans->transid)) {
ret = BTRFS_NO_LOG_SYNC;
goto end_no_trans;
}
@@ -5438,7 +5448,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
- ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx);
+ ret = btrfs_log_inode(trans, root, BTRFS_I(inode), inode_only,
+ start, end, ctx);
if (ret)
goto end_trans;
@@ -5514,7 +5525,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
break;
if (BTRFS_I(inode)->generation > last_committed) {
- ret = btrfs_log_inode(trans, root, inode,
+ ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
LOG_INODE_EXISTS,
0, LLONG_MAX, ctx);
if (ret)
@@ -5528,7 +5539,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
old_parent = parent;
}
if (log_dentries)
- ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+ ret = log_new_dir_dentries(trans, root, BTRFS_I(orig_inode), ctx);
else
ret = 0;
end_trans:
@@ -5723,7 +5734,7 @@ error:
* inodes, etc) are done.
*/
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
int for_rename)
{
/*
@@ -5736,23 +5747,23 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
* into the file. When the file is logged we check it and
* don't log the parents if the file is fully on disk.
*/
- mutex_lock(&BTRFS_I(inode)->log_mutex);
- BTRFS_I(inode)->last_unlink_trans = trans->transid;
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
+ mutex_lock(&inode->log_mutex);
+ inode->last_unlink_trans = trans->transid;
+ mutex_unlock(&inode->log_mutex);
/*
* if this directory was already logged any new
* names for this file/dir will get recorded
*/
smp_mb();
- if (BTRFS_I(dir)->logged_trans == trans->transid)
+ if (dir->logged_trans == trans->transid)
return;
/*
* if the inode we're about to unlink was logged,
* the log will be properly updated for any new names
*/
- if (BTRFS_I(inode)->logged_trans == trans->transid)
+ if (inode->logged_trans == trans->transid)
return;
/*
@@ -5769,9 +5780,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
return;
record:
- mutex_lock(&BTRFS_I(dir)->log_mutex);
- BTRFS_I(dir)->last_unlink_trans = trans->transid;
- mutex_unlock(&BTRFS_I(dir)->log_mutex);
+ mutex_lock(&dir->log_mutex);
+ dir->last_unlink_trans = trans->transid;
+ mutex_unlock(&dir->log_mutex);
}
/*
@@ -5787,11 +5798,11 @@ record:
* parent root and tree of tree roots trees, etc) are done.
*/
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
- struct inode *dir)
+ struct btrfs_inode *dir)
{
- mutex_lock(&BTRFS_I(dir)->log_mutex);
- BTRFS_I(dir)->last_unlink_trans = trans->transid;
- mutex_unlock(&BTRFS_I(dir)->log_mutex);
+ mutex_lock(&dir->log_mutex);
+ dir->last_unlink_trans = trans->transid;
+ mutex_unlock(&dir->log_mutex);
}
/*
@@ -5802,30 +5813,28 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
* full transaction commit is required.
*/
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *old_dir,
+ struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root * root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
/*
* this will force the logging code to walk the dentry chain
* up for the file
*/
- if (S_ISREG(inode->i_mode))
- BTRFS_I(inode)->last_unlink_trans = trans->transid;
+ if (S_ISREG(inode->vfs_inode.i_mode))
+ inode->last_unlink_trans = trans->transid;
/*
* if this inode hasn't been logged and directory we're renaming it
* from hasn't been logged, we don't need to log it
*/
- if (BTRFS_I(inode)->logged_trans <=
- fs_info->last_trans_committed &&
- (!old_dir || BTRFS_I(old_dir)->logged_trans <=
- fs_info->last_trans_committed))
+ if (inode->logged_trans <= fs_info->last_trans_committed &&
+ (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
return 0;
- return btrfs_log_inode_parent(trans, root, inode, parent, 0,
+ return btrfs_log_inode_parent(trans, root, &inode->vfs_inode, parent, 0,
LLONG_MAX, 1, NULL);
}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index ab858e31ccbc..483027f9a7f4 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -48,13 +48,13 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans)
{
- ACCESS_ONCE(fs_info->last_trans_log_full_commit) = trans->transid;
+ WRITE_ONCE(fs_info->last_trans_log_full_commit, trans->transid);
}
static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans)
{
- return ACCESS_ONCE(fs_info->last_trans_log_full_commit) ==
+ return READ_ONCE(fs_info->last_trans_log_full_commit) ==
trans->transid;
}
@@ -72,19 +72,19 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
- struct inode *dir, u64 index);
+ struct btrfs_inode *dir, u64 index);
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
- struct inode *inode, u64 dirid);
+ struct btrfs_inode *inode, u64 dirid);
void btrfs_end_log_trans(struct btrfs_root *root);
int btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
int for_rename);
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
- struct inode *dir);
+ struct btrfs_inode *dir);
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *old_dir,
+ struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent);
#endif
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index b1434bb57e36..d8edf164f81c 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -52,13 +52,13 @@ void ulist_init(struct ulist *ulist)
}
/**
- * ulist_fini - free up additionally allocated memory for the ulist
+ * ulist_release - free up additionally allocated memory for the ulist
* @ulist: the ulist from which to free the additional memory
*
* This is useful in cases where the base 'struct ulist' has been statically
* allocated.
*/
-static void ulist_fini(struct ulist *ulist)
+void ulist_release(struct ulist *ulist)
{
struct ulist_node *node;
struct ulist_node *next;
@@ -79,7 +79,7 @@ static void ulist_fini(struct ulist *ulist)
*/
void ulist_reinit(struct ulist *ulist)
{
- ulist_fini(ulist);
+ ulist_release(ulist);
ulist_init(ulist);
}
@@ -105,13 +105,13 @@ struct ulist *ulist_alloc(gfp_t gfp_mask)
* ulist_free - free dynamically allocated ulist
* @ulist: ulist to free
*
- * It is not necessary to call ulist_fini before.
+ * It is not necessary to call ulist_release before.
*/
void ulist_free(struct ulist *ulist)
{
if (!ulist)
return;
- ulist_fini(ulist);
+ ulist_release(ulist);
kfree(ulist);
}
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index a01a2c45825f..53c913632733 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -19,9 +19,6 @@
*
*/
struct ulist_iterator {
-#ifdef CONFIG_BTRFS_DEBUG
- int i;
-#endif
struct list_head *cur_list; /* hint to start search */
};
@@ -32,10 +29,6 @@ struct ulist_node {
u64 val; /* value to store */
u64 aux; /* auxiliary value saved along with the val */
-#ifdef CONFIG_BTRFS_DEBUG
- int seqnum; /* sequence number this node is added */
-#endif
-
struct list_head list; /* used to link node */
struct rb_node rb_node; /* used to speed up search */
};
@@ -51,6 +44,7 @@ struct ulist {
};
void ulist_init(struct ulist *ulist);
+void ulist_release(struct ulist *ulist);
void ulist_reinit(struct ulist *ulist);
struct ulist *ulist_alloc(gfp_t gfp_mask);
void ulist_free(struct ulist *ulist);
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 161342b73ce5..726f928238d0 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -352,7 +352,5 @@ skip:
out:
btrfs_free_path(path);
- if (ret)
- btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret);
- return 0;
+ return ret;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3c3c69c0eee4..1fac98728814 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -134,8 +134,7 @@ const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
};
static int init_first_rw_device(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_device *device);
+ struct btrfs_fs_info *fs_info);
static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
@@ -2440,7 +2439,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *device_path)
if (seeding_dev) {
mutex_lock(&fs_info->chunk_mutex);
- ret = init_first_rw_device(trans, fs_info, device);
+ ret = init_first_rw_device(trans, fs_info);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -4584,8 +4583,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
/ sizeof(struct btrfs_stripe) + 1)
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 start,
- u64 type)
+ u64 start, u64 type)
{
struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_fs_devices *fs_devices = info->fs_devices;
@@ -5009,12 +5007,11 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
chunk_offset = find_next_chunk(fs_info);
- return __btrfs_alloc_chunk(trans, fs_info, chunk_offset, type);
+ return __btrfs_alloc_chunk(trans, chunk_offset, type);
}
static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_device *device)
+ struct btrfs_fs_info *fs_info)
{
struct btrfs_root *extent_root = fs_info->extent_root;
u64 chunk_offset;
@@ -5024,14 +5021,13 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
chunk_offset = find_next_chunk(fs_info);
alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
- ret = __btrfs_alloc_chunk(trans, fs_info, chunk_offset, alloc_profile);
+ ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile);
if (ret)
return ret;
sys_chunk_offset = find_next_chunk(fs_info);
alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
- ret = __btrfs_alloc_chunk(trans, fs_info, sys_chunk_offset,
- alloc_profile);
+ ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile);
return ret;
}
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 9621c7f2503e..b3cbf80c5acf 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -47,8 +47,8 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
return -ENOMEM;
/* lookup the xattr by name */
- di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name,
- strlen(name), 0);
+ di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(BTRFS_I(inode)),
+ name, strlen(name), 0);
if (!di) {
ret = -ENODATA;
goto out;
@@ -108,8 +108,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
path->skip_release_on_error = 1;
if (!value) {
- di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
- name, name_len, -1);
+ di = btrfs_lookup_xattr(trans, root, path,
+ btrfs_ino(BTRFS_I(inode)), name, name_len, -1);
if (!di && (flags & XATTR_REPLACE))
ret = -ENODATA;
else if (IS_ERR(di))
@@ -128,8 +128,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
*/
if (flags & XATTR_REPLACE) {
ASSERT(inode_is_locked(inode));
- di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
- name, name_len, 0);
+ di = btrfs_lookup_xattr(NULL, root, path,
+ btrfs_ino(BTRFS_I(inode)), name, name_len, 0);
if (!di)
ret = -ENODATA;
else if (IS_ERR(di))
@@ -140,7 +140,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
di = NULL;
}
- ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+ ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(BTRFS_I(inode)),
name, name_len, value, size);
if (ret == -EOVERFLOW) {
/*
@@ -278,7 +278,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
* NOTE: we set key.offset = 0; because we want to start with the
* first xattr that we find and walk forward
*/
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_XATTR_ITEM_KEY;
key.offset = 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index d21771fcf7d3..0e87401cf335 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1660,7 +1660,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
head = page_buffers(page);
bh = head;
do {
- if (!buffer_mapped(bh))
+ if (!buffer_mapped(bh) || (bh->b_blocknr < block))
goto next;
if (bh->b_blocknr >= block + len)
break;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 9cd0c0ea7cdb..e4b066cd912a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -502,9 +502,9 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
dout(" head snapc %p has %d dirty pages\n",
snapc, ci->i_wrbuffer_ref_head);
if (truncate_size)
- *truncate_size = capsnap->truncate_size;
+ *truncate_size = ci->i_truncate_size;
if (truncate_seq)
- *truncate_seq = capsnap->truncate_seq;
+ *truncate_seq = ci->i_truncate_seq;
}
spin_unlock(&ci->i_ceph_lock);
return snapc;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index baea866a6751..94fd76d04683 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2591,8 +2591,13 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
add_wait_queue(&ci->i_cap_wq, &wait);
while (!try_get_cap_refs(ci, need, want, endoff,
- true, &_got, &err))
+ true, &_got, &err)) {
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+ }
remove_wait_queue(&ci->i_cap_wq, &wait);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d7a93696663b..8ab1fdf0bd49 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1230,7 +1230,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
struct ceph_mds_client *mdsc =
ceph_sb_to_client(dir->i_sb)->mdsc;
struct ceph_mds_request *req;
- int op, mask, err;
+ int op, err;
+ u32 mask;
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -1245,7 +1246,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
if (ceph_security_xattr_wanted(dir))
mask |= CEPH_CAP_XATTR_SHARED;
- req->r_args.getattr.mask = mask;
+ req->r_args.getattr.mask = cpu_to_le32(mask);
err = ceph_mdsc_do_request(mdsc, NULL, req);
switch (err) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 398e5328b309..5e659d054b40 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -305,7 +305,8 @@ static int frag_tree_split_cmp(const void *l, const void *r)
{
struct ceph_frag_tree_split *ls = (struct ceph_frag_tree_split*)l;
struct ceph_frag_tree_split *rs = (struct ceph_frag_tree_split*)r;
- return ceph_frag_compare(ls->frag, rs->frag);
+ return ceph_frag_compare(le32_to_cpu(ls->frag),
+ le32_to_cpu(rs->frag));
}
static bool is_frag_child(u32 f, struct ceph_inode_frag *frag)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4f49253387a0..c9d2e553a6c4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -288,12 +288,13 @@ static int parse_reply_info_extra(void **p, void *end,
struct ceph_mds_reply_info_parsed *info,
u64 features)
{
- if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
+ u32 op = le32_to_cpu(info->head->op);
+
+ if (op == CEPH_MDS_OP_GETFILELOCK)
return parse_reply_info_filelock(p, end, info, features);
- else if (info->head->op == CEPH_MDS_OP_READDIR ||
- info->head->op == CEPH_MDS_OP_LSSNAP)
+ else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
return parse_reply_info_dir(p, end, info, features);
- else if (info->head->op == CEPH_MDS_OP_CREATE)
+ else if (op == CEPH_MDS_OP_CREATE)
return parse_reply_info_create(p, end, info, features);
else
return -EIO;
@@ -2106,6 +2107,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
dout("do_request mdsmap err %d\n", err);
goto finish;
}
+ if (mdsc->mdsmap->m_epoch == 0) {
+ dout("do_request no mdsmap, waiting for map\n");
+ list_add(&req->r_wait, &mdsc->waiting_for_map);
+ goto finish;
+ }
if (!(mdsc->fsc->mount_options->flags &
CEPH_MOUNT_OPT_MOUNTWAIT) &&
!ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index e7b478b49985..034f00f21390 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -9,8 +9,6 @@ config CIFS
select CRYPTO_ARC4
select CRYPTO_ECB
select CRYPTO_DES
- select CRYPTO_SHA256
- select CRYPTO_CMAC
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block
@@ -169,11 +167,15 @@ config CIFS_NFSD_EXPORT
config CIFS_SMB2
bool "SMB2 and SMB3 network file system support"
- depends on CIFS && INET
- select NLS
+ depends on CIFS
select KEYS
select FSCACHE
select DNS_RESOLVER
+ select CRYPTO_AES
+ select CRYPTO_SHA256
+ select CRYPTO_CMAC
+ select CRYPTO_AEAD2
+ select CRYPTO_CCM
help
This enables support for the Server Message Block version 2
@@ -194,7 +196,7 @@ config CIFS_SMB2
config CIFS_SMB311
bool "SMB3.1.1 network file system support (Experimental)"
- depends on CIFS_SMB2 && INET
+ depends on CIFS_SMB2
help
This enables experimental support for the newest, SMB3.1.1, dialect.
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 66bd7fa9b7a6..058ac9b36f04 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -34,6 +34,7 @@
#include <linux/random.h>
#include <linux/highmem.h>
#include <crypto/skcipher.h>
+#include <crypto/aead.h>
static int
cifs_crypto_shash_md5_allocate(struct TCP_Server_Info *server)
@@ -75,24 +76,20 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
struct kvec *iov = rqst->rq_iov;
int n_vec = rqst->rq_nvec;
- for (i = 0; i < n_vec; i++) {
+ if (n_vec < 2 || iov[0].iov_len != 4)
+ return -EIO;
+
+ for (i = 1; i < n_vec; i++) {
if (iov[i].iov_len == 0)
continue;
if (iov[i].iov_base == NULL) {
cifs_dbg(VFS, "null iovec entry\n");
return -EIO;
}
- /* The first entry includes a length field (which does not get
- signed that occupies the first 4 bytes before the header */
- if (i == 0) {
- if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
- break; /* nothing to sign or corrupt header */
- rc = crypto_shash_update(shash,
- iov[i].iov_base + 4, iov[i].iov_len - 4);
- } else {
- rc = crypto_shash_update(shash,
- iov[i].iov_base, iov[i].iov_len);
- }
+ if (i == 1 && iov[1].iov_len <= 4)
+ break; /* nothing to sign or corrupt header */
+ rc = crypto_shash_update(shash,
+ iov[i].iov_base, iov[i].iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with payload\n",
__func__);
@@ -168,6 +165,10 @@ int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
char smb_signature[20];
struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
+ if (rqst->rq_iov[0].iov_len != 4 ||
+ rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
+ return -EIO;
+
if ((cifs_pdu == NULL) || (server == NULL))
return -EINVAL;
@@ -209,12 +210,14 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number)
{
- struct kvec iov;
+ struct kvec iov[2];
- iov.iov_base = cifs_pdu;
- iov.iov_len = be32_to_cpu(cifs_pdu->smb_buf_length) + 4;
+ iov[0].iov_base = cifs_pdu;
+ iov[0].iov_len = 4;
+ iov[1].iov_base = (char *)cifs_pdu + 4;
+ iov[1].iov_len = be32_to_cpu(cifs_pdu->smb_buf_length);
- return cifs_sign_smbv(&iov, 1, server,
+ return cifs_sign_smbv(iov, 2, server,
pexpected_response_sequence_number);
}
@@ -227,6 +230,10 @@ int cifs_verify_signature(struct smb_rqst *rqst,
char what_we_think_sig_should_be[20];
struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
+ if (rqst->rq_iov[0].iov_len != 4 ||
+ rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
+ return -EIO;
+
if (cifs_pdu == NULL || server == NULL)
return -EINVAL;
@@ -868,7 +875,7 @@ out:
}
void
-cifs_crypto_shash_release(struct TCP_Server_Info *server)
+cifs_crypto_secmech_release(struct TCP_Server_Info *server)
{
if (server->secmech.cmacaes) {
crypto_free_shash(server->secmech.cmacaes);
@@ -890,6 +897,16 @@ cifs_crypto_shash_release(struct TCP_Server_Info *server)
server->secmech.hmacmd5 = NULL;
}
+ if (server->secmech.ccmaesencrypt) {
+ crypto_free_aead(server->secmech.ccmaesencrypt);
+ server->secmech.ccmaesencrypt = NULL;
+ }
+
+ if (server->secmech.ccmaesdecrypt) {
+ crypto_free_aead(server->secmech.ccmaesdecrypt);
+ server->secmech.ccmaesdecrypt = NULL;
+ }
+
kfree(server->secmech.sdesccmacaes);
server->secmech.sdesccmacaes = NULL;
kfree(server->secmech.sdeschmacsha256);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 70f4e65fced2..15e1db8738ae 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1365,5 +1365,19 @@ MODULE_DESCRIPTION
("VFS to access servers complying with the SNIA CIFS Specification "
"e.g. Samba and Windows");
MODULE_VERSION(CIFS_VERSION);
+MODULE_SOFTDEP("pre: arc4");
+MODULE_SOFTDEP("pre: des");
+MODULE_SOFTDEP("pre: ecb");
+MODULE_SOFTDEP("pre: hmac");
+MODULE_SOFTDEP("pre: md4");
+MODULE_SOFTDEP("pre: md5");
+MODULE_SOFTDEP("pre: nls");
+#ifdef CONFIG_CIFS_SMB2
+MODULE_SOFTDEP("pre: aes");
+MODULE_SOFTDEP("pre: cmac");
+MODULE_SOFTDEP("pre: sha256");
+MODULE_SOFTDEP("pre: aead2");
+MODULE_SOFTDEP("pre: ccm");
+#endif /* CONFIG_CIFS_SMB2 */
module_init(init_cifs)
module_exit(exit_cifs)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 7ea8a3393936..1a90bb3e2986 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -136,6 +136,8 @@ struct cifs_secmech {
struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */
struct sdesc *sdesccmacaes; /* ctxt to generate smb3 signature */
+ struct crypto_aead *ccmaesencrypt; /* smb3 encryption aead */
+ struct crypto_aead *ccmaesdecrypt; /* smb3 decryption aead */
};
/* per smb session structure/fields */
@@ -208,7 +210,7 @@ struct cifsInodeInfo;
struct cifs_open_parms;
struct smb_version_operations {
- int (*send_cancel)(struct TCP_Server_Info *, void *,
+ int (*send_cancel)(struct TCP_Server_Info *, struct smb_rqst *,
struct mid_q_entry *);
bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *);
/* setup request: allocate mid, sign message */
@@ -433,6 +435,14 @@ struct smb_version_operations {
bool (*dir_needs_close)(struct cifsFileInfo *);
long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t,
loff_t);
+ /* init transform request - used for encryption for now */
+ int (*init_transform_rq)(struct TCP_Server_Info *, struct smb_rqst *,
+ struct smb_rqst *);
+ /* free transform request */
+ void (*free_transform_rq)(struct smb_rqst *);
+ int (*is_transform_hdr)(void *buf);
+ int (*receive_transform)(struct TCP_Server_Info *,
+ struct mid_q_entry **);
};
struct smb_version_values {
@@ -1119,7 +1129,10 @@ struct cifs_readdata {
int (*read_into_pages)(struct TCP_Server_Info *server,
struct cifs_readdata *rdata,
unsigned int len);
- struct kvec iov;
+ int (*copy_into_pages)(struct TCP_Server_Info *server,
+ struct cifs_readdata *rdata,
+ struct iov_iter *iter);
+ struct kvec iov[2];
unsigned int pagesz;
unsigned int tailsz;
unsigned int credits;
@@ -1302,6 +1315,13 @@ typedef int (mid_receive_t)(struct TCP_Server_Info *server,
*/
typedef void (mid_callback_t)(struct mid_q_entry *mid);
+/*
+ * This is the protopyte for mid handle function. This is called once the mid
+ * has been recognized after decryption of the message.
+ */
+typedef int (mid_handle_t)(struct TCP_Server_Info *server,
+ struct mid_q_entry *mid);
+
/* one of these for every pending CIFS request to the server */
struct mid_q_entry {
struct list_head qhead; /* mids waiting on reply from this server */
@@ -1316,6 +1336,7 @@ struct mid_q_entry {
#endif
mid_receive_t *receive; /* call receive callback */
mid_callback_t *callback; /* call completion callback */
+ mid_handle_t *handle; /* call handle mid callback */
void *callback_data; /* general purpose pointer for callback */
void *resp_buf; /* pointer to received SMB header */
int mid_state; /* wish this were enum but can not pass to wait_event */
@@ -1323,6 +1344,7 @@ struct mid_q_entry {
bool large_buf:1; /* if valid response, is pointer to large buf */
bool multiRsp:1; /* multiple trans2 responses for one request */
bool multiEnd:1; /* both received */
+ bool decrypted:1; /* decrypted entry */
};
/* Make code in transport.c a little cleaner by moving
@@ -1475,7 +1497,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
#define CIFS_OBREAK_OP 0x0100 /* oplock break request */
#define CIFS_NEG_OP 0x0200 /* negotiate request */
#define CIFS_OP_MASK 0x0380 /* mask request type */
+
#define CIFS_HAS_CREDITS 0x0400 /* already has credits */
+#define CIFS_TRANSFORM_REQ 0x0800 /* transform request before sending */
/* Security Flags: indicate type of session setup needed */
#define CIFSSEC_MAY_SIGN 0x00001
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c7b3c841e660..406d2c10ba78 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -75,10 +75,16 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
extern void DeleteMidQEntry(struct mid_q_entry *midEntry);
extern void cifs_delete_mid(struct mid_q_entry *mid);
extern void cifs_wake_up_task(struct mid_q_entry *mid);
+extern int cifs_handle_standard(struct TCP_Server_Info *server,
+ struct mid_q_entry *mid);
+extern int cifs_discard_remaining_data(struct TCP_Server_Info *server);
extern int cifs_call_async(struct TCP_Server_Info *server,
struct smb_rqst *rqst,
mid_receive_t *receive, mid_callback_t *callback,
- void *cbdata, const int flags);
+ mid_handle_t *handle, void *cbdata, const int flags);
+extern int cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ struct smb_rqst *rqst, int *resp_buf_type,
+ const int flags, struct kvec *resp_iov);
extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
struct smb_hdr * /* input */ ,
struct smb_hdr * /* out */ ,
@@ -96,7 +102,8 @@ extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server,
unsigned int *credits);
extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
struct kvec *, int /* nvec to send */,
- int * /* type of buf returned */ , const int flags);
+ int * /* type of buf returned */, const int flags,
+ struct kvec * /* resp vec */);
extern int SendReceiveBlockingLock(const unsigned int xid,
struct cifs_tcon *ptcon,
struct smb_hdr *in_buf ,
@@ -441,7 +448,7 @@ extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *,
const struct nls_table *);
extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
-extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
+extern void cifs_crypto_secmech_release(struct TCP_Server_Info *server);
extern int calc_seckey(struct cifs_ses *);
extern int generate_smb30signingkey(struct cifs_ses *);
extern int generate_smb311signingkey(struct cifs_ses *);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b47261858e6d..f5099fb8a22f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -673,6 +673,7 @@ CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon)
return rc;
rc = SendReceiveNoRsp(xid, tcon->ses, (char *)smb_buffer, 0);
+ cifs_small_buf_release(smb_buffer);
if (rc)
cifs_dbg(FYI, "Tree disconnect failed %d\n", rc);
@@ -707,9 +708,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
{
ECHO_REQ *smb;
int rc = 0;
- struct kvec iov;
- struct smb_rqst rqst = { .rq_iov = &iov,
- .rq_nvec = 1 };
+ struct kvec iov[2];
+ struct smb_rqst rqst = { .rq_iov = iov,
+ .rq_nvec = 2 };
cifs_dbg(FYI, "In echo request\n");
@@ -724,10 +725,13 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
put_bcc(1, &smb->hdr);
smb->Data[0] = 'a';
inc_rfc1001_len(smb, 3);
- iov.iov_base = smb;
- iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
- rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback,
+ iov[0].iov_len = 4;
+ iov[0].iov_base = smb;
+ iov[1].iov_len = get_rfc1002_length(smb);
+ iov[1].iov_base = (char *)smb + 4;
+
+ rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback, NULL,
server, CIFS_ASYNC_OP | CIFS_ECHO_OP);
if (rc)
cifs_dbg(FYI, "Echo request failed: %d\n", rc);
@@ -772,6 +776,7 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses)
pSMB->AndXCommand = 0xFF;
rc = SendReceiveNoRsp(xid, ses, (char *) pSMB, 0);
+ cifs_small_buf_release(pSMB);
session_already_dead:
mutex_unlock(&ses->session_mutex);
@@ -1394,8 +1399,8 @@ openRetry:
* Discard any remaining data in the current SMB. To do this, we borrow the
* current bigbuf.
*/
-static int
-discard_remaining_data(struct TCP_Server_Info *server)
+int
+cifs_discard_remaining_data(struct TCP_Server_Info *server)
{
unsigned int rfclen = get_rfc1002_length(server->smallbuf);
int remaining = rfclen + 4 - server->total_read;
@@ -1421,7 +1426,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
int length;
struct cifs_readdata *rdata = mid->callback_data;
- length = discard_remaining_data(server);
+ length = cifs_discard_remaining_data(server);
dequeue_mid(mid, rdata->result);
return length;
}
@@ -1454,7 +1459,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
if (server->ops->is_status_pending &&
server->ops->is_status_pending(buf, server, 0)) {
- discard_remaining_data(server);
+ cifs_discard_remaining_data(server);
return -1;
}
@@ -1507,10 +1512,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
}
/* set up first iov for signature check */
- rdata->iov.iov_base = buf;
- rdata->iov.iov_len = server->total_read;
- cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
- rdata->iov.iov_base, rdata->iov.iov_len);
+ rdata->iov[0].iov_base = buf;
+ rdata->iov[0].iov_len = 4;
+ rdata->iov[1].iov_base = buf + 4;
+ rdata->iov[1].iov_len = server->total_read - 4;
+ cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n",
+ rdata->iov[0].iov_base, server->total_read);
/* how much data is in the response? */
data_len = server->ops->read_data_length(buf);
@@ -1543,8 +1550,8 @@ cifs_readv_callback(struct mid_q_entry *mid)
struct cifs_readdata *rdata = mid->callback_data;
struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
- struct smb_rqst rqst = { .rq_iov = &rdata->iov,
- .rq_nvec = 1,
+ struct smb_rqst rqst = { .rq_iov = rdata->iov,
+ .rq_nvec = 2,
.rq_pages = rdata->pages,
.rq_npages = rdata->nr_pages,
.rq_pagesz = rdata->pagesz,
@@ -1599,8 +1606,8 @@ cifs_async_readv(struct cifs_readdata *rdata)
READ_REQ *smb = NULL;
int wct;
struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
- struct smb_rqst rqst = { .rq_iov = &rdata->iov,
- .rq_nvec = 1 };
+ struct smb_rqst rqst = { .rq_iov = rdata->iov,
+ .rq_nvec = 2 };
cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n",
__func__, rdata->offset, rdata->bytes);
@@ -1640,12 +1647,14 @@ cifs_async_readv(struct cifs_readdata *rdata)
}
/* 4 for RFC1001 length + 1 for BCC */
- rdata->iov.iov_base = smb;
- rdata->iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
+ rdata->iov[0].iov_base = smb;
+ rdata->iov[0].iov_len = 4;
+ rdata->iov[1].iov_base = (char *)smb + 4;
+ rdata->iov[1].iov_len = get_rfc1002_length(smb);
kref_get(&rdata->refcount);
rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive,
- cifs_readv_callback, rdata, 0);
+ cifs_readv_callback, NULL, rdata, 0);
if (rc == 0)
cifs_stats_inc(&tcon->stats.cifs_stats.num_reads);
@@ -1667,6 +1676,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms,
int wct;
int resp_buf_type = 0;
struct kvec iov[1];
+ struct kvec rsp_iov;
__u32 pid = io_parms->pid;
__u16 netfid = io_parms->netfid;
__u64 offset = io_parms->offset;
@@ -1716,10 +1726,11 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms,
iov[0].iov_base = (char *)pSMB;
iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
- rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
- &resp_buf_type, CIFS_LOG_ERROR);
+ rc = SendReceive2(xid, tcon->ses, iov, 1, &resp_buf_type,
+ CIFS_LOG_ERROR, &rsp_iov);
+ cifs_small_buf_release(pSMB);
cifs_stats_inc(&tcon->stats.cifs_stats.num_reads);
- pSMBr = (READ_RSP *)iov[0].iov_base;
+ pSMBr = (READ_RSP *)rsp_iov.iov_base;
if (rc) {
cifs_dbg(VFS, "Send error in read = %d\n", rc);
} else {
@@ -1747,12 +1758,11 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms,
}
}
-/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
if (*buf) {
- free_rsp_buf(resp_buf_type, iov[0].iov_base);
+ free_rsp_buf(resp_buf_type, rsp_iov.iov_base);
} else if (resp_buf_type != CIFS_NO_BUFFER) {
/* return buffer to caller to free */
- *buf = iov[0].iov_base;
+ *buf = rsp_iov.iov_base;
if (resp_buf_type == CIFS_SMALL_BUFFER)
*pbuf_type = CIFS_SMALL_BUFFER;
else if (resp_buf_type == CIFS_LARGE_BUFFER)
@@ -2093,7 +2103,7 @@ cifs_async_writev(struct cifs_writedata *wdata,
WRITE_REQ *smb = NULL;
int wct;
struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
- struct kvec iov;
+ struct kvec iov[2];
struct smb_rqst rqst = { };
if (tcon->ses->capabilities & CAP_LARGE_FILES) {
@@ -2126,11 +2136,13 @@ cifs_async_writev(struct cifs_writedata *wdata,
cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4);
/* 4 for RFC1001 length + 1 for BCC */
- iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1;
- iov.iov_base = smb;
+ iov[0].iov_len = 4;
+ iov[0].iov_base = smb;
+ iov[1].iov_len = get_rfc1002_length(smb) + 1;
+ iov[1].iov_base = (char *)smb + 4;
- rqst.rq_iov = &iov;
- rqst.rq_nvec = 1;
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 2;
rqst.rq_pages = wdata->pages;
rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz;
@@ -2151,12 +2163,12 @@ cifs_async_writev(struct cifs_writedata *wdata,
(struct smb_com_writex_req *)smb;
inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5);
put_bcc(wdata->bytes + 5, &smbw->hdr);
- iov.iov_len += 4; /* pad bigger by four bytes */
+ iov[1].iov_len += 4; /* pad bigger by four bytes */
}
kref_get(&wdata->refcount);
rc = cifs_call_async(tcon->ses->server, &rqst, NULL,
- cifs_writev_callback, wdata, 0);
+ cifs_writev_callback, NULL, wdata, 0);
if (rc == 0)
cifs_stats_inc(&tcon->stats.cifs_stats.num_writes);
@@ -2182,6 +2194,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
__u64 offset = io_parms->offset;
struct cifs_tcon *tcon = io_parms->tcon;
unsigned int count = io_parms->length;
+ struct kvec rsp_iov;
*nbytes = 0;
@@ -2240,8 +2253,9 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
else /* wct == 12 pad bigger by four bytes */
iov[0].iov_len = smb_hdr_len + 8;
-
- rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 0);
+ rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 0,
+ &rsp_iov);
+ cifs_small_buf_release(pSMB);
cifs_stats_inc(&tcon->stats.cifs_stats.num_writes);
if (rc) {
cifs_dbg(FYI, "Send error Write2 = %d\n", rc);
@@ -2249,7 +2263,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
/* presumably this can not happen, but best to be safe */
rc = -EIO;
} else {
- WRITE_RSP *pSMBr = (WRITE_RSP *)iov[0].iov_base;
+ WRITE_RSP *pSMBr = (WRITE_RSP *)rsp_iov.iov_base;
*nbytes = le16_to_cpu(pSMBr->CountHigh);
*nbytes = (*nbytes) << 16;
*nbytes += le16_to_cpu(pSMBr->Count);
@@ -2263,8 +2277,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
*nbytes &= 0xFFFF;
}
-/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
- free_rsp_buf(resp_buf_type, iov[0].iov_base);
+ free_rsp_buf(resp_buf_type, rsp_iov.iov_base);
/* Note: On -EAGAIN error only caller can retry on handle based calls
since file handle passed in no longer valid */
@@ -2279,6 +2292,7 @@ int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
LOCK_REQ *pSMB = NULL;
struct kvec iov[2];
+ struct kvec rsp_iov;
int resp_buf_type;
__u16 count;
@@ -2307,7 +2321,9 @@ int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon,
iov[1].iov_len = (num_unlock + num_lock) * sizeof(LOCKING_ANDX_RANGE);
cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
- rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP);
+ rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP,
+ &rsp_iov);
+ cifs_small_buf_release(pSMB);
if (rc)
cifs_dbg(FYI, "Send error in cifs_lockv = %d\n", rc);
@@ -2368,14 +2384,12 @@ CIFSSMBLock(const unsigned int xid, struct cifs_tcon *tcon,
inc_rfc1001_len(pSMB, count);
pSMB->ByteCount = cpu_to_le16(count);
- if (waitFlag) {
+ if (waitFlag)
rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMB, &bytes_returned);
- cifs_small_buf_release(pSMB);
- } else {
+ else
rc = SendReceiveNoRsp(xid, tcon->ses, (char *)pSMB, flags);
- /* SMB buffer freed by function above */
- }
+ cifs_small_buf_release(pSMB);
cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
if (rc)
cifs_dbg(FYI, "Send error in Lock = %d\n", rc);
@@ -2401,6 +2415,7 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
int resp_buf_type = 0;
__u16 params, param_offset, offset, byte_count, count;
struct kvec iov[1];
+ struct kvec rsp_iov;
cifs_dbg(FYI, "Posix Lock\n");
@@ -2462,11 +2477,10 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
iov[0].iov_base = (char *)pSMB;
iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
- &resp_buf_type, timeout);
- pSMB = NULL; /* request buf already freed by SendReceive2. Do
- not try to free it twice below on exit */
- pSMBr = (struct smb_com_transaction2_sfi_rsp *)iov[0].iov_base;
+ &resp_buf_type, timeout, &rsp_iov);
+ pSMBr = (struct smb_com_transaction2_sfi_rsp *)rsp_iov.iov_base;
}
+ cifs_small_buf_release(pSMB);
if (rc) {
cifs_dbg(FYI, "Send error in Posix Lock = %d\n", rc);
@@ -2506,10 +2520,7 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
}
plk_err_exit:
- if (pSMB)
- cifs_small_buf_release(pSMB);
-
- free_rsp_buf(resp_buf_type, iov[0].iov_base);
+ free_rsp_buf(resp_buf_type, rsp_iov.iov_base);
/* Note: On -EAGAIN error only caller can retry on handle based calls
since file handle passed in no longer valid */
@@ -2536,6 +2547,7 @@ CIFSSMBClose(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id)
pSMB->LastWriteTime = 0xFFFFFFFF;
pSMB->ByteCount = 0;
rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
+ cifs_small_buf_release(pSMB);
cifs_stats_inc(&tcon->stats.cifs_stats.num_closes);
if (rc) {
if (rc != -EINTR) {
@@ -2565,6 +2577,7 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id)
pSMB->FileID = (__u16) smb_file_id;
pSMB->ByteCount = 0;
rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
+ cifs_small_buf_release(pSMB);
cifs_stats_inc(&tcon->stats.cifs_stats.num_flushes);
if (rc)
cifs_dbg(VFS, "Send error in Flush = %d\n", rc);
@@ -3820,6 +3833,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid,
int buf_type = 0;
QUERY_SEC_DESC_REQ *pSMB;
struct kvec iov[1];
+ struct kvec rsp_iov;
cifs_dbg(FYI, "GetCifsACL\n");
@@ -3843,7 +3857,8 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid,
iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
- 0);
+ 0, &rsp_iov);
+ cifs_small_buf_release(pSMB);
cifs_stats_inc(&tcon->stats.cifs_stats.num_acl_get);
if (rc) {
cifs_dbg(FYI, "Send error in QuerySecDesc = %d\n", rc);
@@ -3855,11 +3870,11 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid,
char *pdata;
/* validate_nttransact */
- rc = validate_ntransact(iov[0].iov_base, (char **)&parm,
+ rc = validate_ntransact(rsp_iov.iov_base, (char **)&parm,
&pdata, &parm_len, pbuflen);
if (rc)
goto qsec_out;
- pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
+ pSMBr = (struct smb_com_ntransact_rsp *)rsp_iov.iov_base;
cifs_dbg(FYI, "smb %p parm %p data %p\n",
pSMBr, parm, *acl_inf);
@@ -3896,8 +3911,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid,
}
}
qsec_out:
- free_rsp_buf(buf_type, iov[0].iov_base);
-/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
+ free_rsp_buf(buf_type, rsp_iov.iov_base);
return rc;
}
@@ -4666,6 +4680,7 @@ CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon,
pSMB->FileID = searchHandle;
pSMB->ByteCount = 0;
rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
+ cifs_small_buf_release(pSMB);
if (rc)
cifs_dbg(VFS, "Send error in FindClose = %d\n", rc);
@@ -5687,6 +5702,7 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon,
inc_rfc1001_len(pSMB, byte_count);
pSMB->ByteCount = cpu_to_le16(byte_count);
rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
+ cifs_small_buf_release(pSMB);
if (rc) {
cifs_dbg(FYI, "Send error in SetFileInfo (SetFileSize) = %d\n",
rc);
@@ -5758,6 +5774,7 @@ CIFSSMBSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon,
pSMB->ByteCount = cpu_to_le16(byte_count);
memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
+ cifs_small_buf_release(pSMB);
if (rc)
cifs_dbg(FYI, "Send error in Set Time (SetFileInfo) = %d\n",
rc);
@@ -5818,6 +5835,7 @@ CIFSSMBSetFileDisposition(const unsigned int xid, struct cifs_tcon *tcon,
pSMB->ByteCount = cpu_to_le16(byte_count);
*data_offset = delete_file ? 1 : 0;
rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
+ cifs_small_buf_release(pSMB);
if (rc)
cifs_dbg(FYI, "Send error in SetFileDisposition = %d\n", rc);
@@ -6057,6 +6075,7 @@ CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon,
cifs_fill_unix_set_info((FILE_UNIX_BASIC_INFO *)data_offset, args);
rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0);
+ cifs_small_buf_release(pSMB);
if (rc)
cifs_dbg(FYI, "Send error in Set Time (SetFileInfo) = %d\n",
rc);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 35ae49ed1f76..777ad9f4fc3c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -787,6 +787,15 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
dump_smb(buf, server->total_read);
+ return cifs_handle_standard(server, mid);
+}
+
+int
+cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+ char *buf = server->large_buf ? server->bigbuf : server->smallbuf;
+ int length;
+
/*
* We know that we received enough to get to the MID as we
* checked the pdu_length earlier. Now check to see
@@ -872,12 +881,19 @@ cifs_demultiplex_thread(void *p)
continue;
server->total_read += length;
- mid_entry = server->ops->find_mid(server, buf);
+ if (server->ops->is_transform_hdr &&
+ server->ops->receive_transform &&
+ server->ops->is_transform_hdr(buf)) {
+ length = server->ops->receive_transform(server,
+ &mid_entry);
+ } else {
+ mid_entry = server->ops->find_mid(server, buf);
- if (!mid_entry || !mid_entry->receive)
- length = standard_receive3(server, mid_entry);
- else
- length = mid_entry->receive(server, mid_entry);
+ if (!mid_entry || !mid_entry->receive)
+ length = standard_receive3(server, mid_entry);
+ else
+ length = mid_entry->receive(server, mid_entry);
+ }
if (length < 0)
continue;
@@ -2154,7 +2170,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
- cifs_crypto_shash_release(server);
+ cifs_crypto_secmech_release(server);
cifs_fscache_release_client_cookie(server);
kfree(server->session_key.response);
@@ -2273,7 +2289,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
return tcp_ses;
out_err_crypto_release:
- cifs_crypto_shash_release(tcp_ses);
+ cifs_crypto_secmech_release(tcp_ses);
put_net(cifs_net_ns(tcp_ses));
@@ -2614,12 +2630,18 @@ get_ses_fail:
return ERR_PTR(rc);
}
-static int match_tcon(struct cifs_tcon *tcon, const char *unc)
+static int match_tcon(struct cifs_tcon *tcon, struct smb_vol *volume_info)
{
if (tcon->tidStatus == CifsExiting)
return 0;
- if (strncmp(tcon->treeName, unc, MAX_TREE_SIZE))
+ if (strncmp(tcon->treeName, volume_info->UNC, MAX_TREE_SIZE))
return 0;
+ if (tcon->seal != volume_info->seal)
+ return 0;
+#ifdef CONFIG_CIFS_SMB2
+ if (tcon->snapshot_time != volume_info->snapshot_time)
+ return 0;
+#endif /* CONFIG_CIFS_SMB2 */
return 1;
}
@@ -2632,14 +2654,8 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
spin_lock(&cifs_tcp_ses_lock);
list_for_each(tmp, &ses->tcon_list) {
tcon = list_entry(tmp, struct cifs_tcon, tcon_list);
- if (!match_tcon(tcon, volume_info->UNC))
- continue;
-
-#ifdef CONFIG_CIFS_SMB2
- if (tcon->snapshot_time != volume_info->snapshot_time)
+ if (!match_tcon(tcon, volume_info))
continue;
-#endif /* CONFIG_CIFS_SMB2 */
-
++tcon->tc_count;
spin_unlock(&cifs_tcp_ses_lock);
return tcon;
@@ -2685,8 +2701,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
cifs_dbg(FYI, "Found match on UNC path\n");
/* existing tcon already has a reference */
cifs_put_smb_ses(ses);
- if (tcon->seal != volume_info->seal)
- cifs_dbg(VFS, "transport encryption setting conflicts with existing tid\n");
return tcon;
}
@@ -2742,7 +2756,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
cifs_dbg(FYI, "DFS disabled (%d)\n", tcon->Flags);
}
- tcon->seal = volume_info->seal;
tcon->use_persistent = false;
/* check if SMB2 or later, CIFS does not support persistent handles */
if (volume_info->persistent) {
@@ -2779,6 +2792,24 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
tcon->use_resilient = true;
}
+ if (volume_info->seal) {
+ if (ses->server->vals->protocol_id == 0) {
+ cifs_dbg(VFS,
+ "SMB3 or later required for encryption\n");
+ rc = -EOPNOTSUPP;
+ goto out_fail;
+#ifdef CONFIG_CIFS_SMB2
+ } else if (tcon->ses->server->capabilities &
+ SMB2_GLOBAL_CAP_ENCRYPTION)
+ tcon->seal = true;
+ else {
+ cifs_dbg(VFS, "Encryption is not supported on share\n");
+ rc = -EOPNOTSUPP;
+ goto out_fail;
+#endif /* CONFIG_CIFS_SMB2 */
+ }
+ }
+
/*
* We can have only one retry value for a connection to a share so for
* resources mounted more than once to the same server share the last
@@ -2910,7 +2941,7 @@ cifs_match_super(struct super_block *sb, void *data)
if (!match_server(tcp_srv, volume_info) ||
!match_session(ses, volume_info) ||
- !match_tcon(tcon, volume_info->UNC) ||
+ !match_tcon(tcon, volume_info) ||
!match_prepath(sb, mnt_data)) {
rc = 0;
goto out;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 18a1e1d6671f..98dc842e7245 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2884,7 +2884,15 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
for (i = 0; i < rdata->nr_pages; i++) {
struct page *page = rdata->pages[i];
size_t copy = min_t(size_t, remaining, PAGE_SIZE);
- size_t written = copy_page_to_iter(page, 0, copy, iter);
+ size_t written;
+
+ if (unlikely(iter->type & ITER_PIPE)) {
+ void *addr = kmap_atomic(page);
+
+ written = copy_to_iter(addr, copy, iter);
+ kunmap_atomic(addr);
+ } else
+ written = copy_page_to_iter(page, 0, copy, iter);
remaining -= written;
if (written < copy && iov_iter_count(iter) > 0)
break;
@@ -2903,8 +2911,9 @@ cifs_uncached_readv_complete(struct work_struct *work)
}
static int
-cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
- struct cifs_readdata *rdata, unsigned int len)
+uncached_fill_pages(struct TCP_Server_Info *server,
+ struct cifs_readdata *rdata, struct iov_iter *iter,
+ unsigned int len)
{
int result = 0;
unsigned int i;
@@ -2933,7 +2942,10 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
rdata->tailsz = len;
len = 0;
}
- result = cifs_read_page_from_socket(server, page, n);
+ if (iter)
+ result = copy_page_from_iter(page, 0, n, iter);
+ else
+ result = cifs_read_page_from_socket(server, page, n);
if (result < 0)
break;
@@ -2945,6 +2957,21 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
}
static int
+cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
+ struct cifs_readdata *rdata, unsigned int len)
+{
+ return uncached_fill_pages(server, rdata, NULL, len);
+}
+
+static int
+cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
+ struct cifs_readdata *rdata,
+ struct iov_iter *iter)
+{
+ return uncached_fill_pages(server, rdata, iter, iter->count);
+}
+
+static int
cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
{
@@ -2991,6 +3018,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
rdata->pid = pid;
rdata->pagesz = PAGE_SIZE;
rdata->read_into_pages = cifs_uncached_read_into_pages;
+ rdata->copy_into_pages = cifs_uncached_copy_into_pages;
rdata->credits = credits;
if (!rdata->cfile->invalidHandle ||
@@ -3341,8 +3369,9 @@ cifs_readv_complete(struct work_struct *work)
}
static int
-cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
- struct cifs_readdata *rdata, unsigned int len)
+readpages_fill_pages(struct TCP_Server_Info *server,
+ struct cifs_readdata *rdata, struct iov_iter *iter,
+ unsigned int len)
{
int result = 0;
unsigned int i;
@@ -3396,7 +3425,10 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
continue;
}
- result = cifs_read_page_from_socket(server, page, n);
+ if (iter)
+ result = copy_page_from_iter(page, 0, n, iter);
+ else
+ result = cifs_read_page_from_socket(server, page, n);
if (result < 0)
break;
@@ -3408,6 +3440,21 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
}
static int
+cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
+ struct cifs_readdata *rdata, unsigned int len)
+{
+ return readpages_fill_pages(server, rdata, NULL, len);
+}
+
+static int
+cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
+ struct cifs_readdata *rdata,
+ struct iov_iter *iter)
+{
+ return readpages_fill_pages(server, rdata, iter, iter->count);
+}
+
+static int
readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
unsigned int rsize, struct list_head *tmplist,
unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
@@ -3561,6 +3608,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
rdata->pid = pid;
rdata->pagesz = PAGE_SIZE;
rdata->read_into_pages = cifs_readpages_read_into_pages;
+ rdata->copy_into_pages = cifs_readpages_copy_into_pages;
rdata->credits = credits;
list_for_each_entry_safe(page, tpage, &tmplist, lru) {
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 8f6a2a5863b9..a27fc8791551 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -285,6 +285,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file)
rc = -ENOMEM;
goto error_exit;
}
+ spin_lock_init(&cifsFile->file_info_lock);
file->private_data = cifsFile;
cifsFile->tlink = cifs_get_tlink(tlink);
tcon = tlink_tcon(tlink);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 538d9b55699a..dcbcc927399a 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -344,13 +344,12 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
/* BB is NTLMV2 session security format easier to use here? */
flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
- NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
- if (ses->server->sign) {
+ NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
+ NTLMSSP_NEGOTIATE_SEAL;
+ if (ses->server->sign)
flags |= NTLMSSP_NEGOTIATE_SIGN;
- if (!ses->server->session_estab ||
- ses->ntlmssp->sesskey_per_smbsess)
- flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
- }
+ if (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
sec_blob->NegotiateFlags = cpu_to_le32(flags);
@@ -407,13 +406,12 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
flags = NTLMSSP_NEGOTIATE_56 |
NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
- NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
- if (ses->server->sign) {
+ NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
+ NTLMSSP_NEGOTIATE_SEAL;
+ if (ses->server->sign)
flags |= NTLMSSP_NEGOTIATE_SIGN;
- if (!ses->server->session_estab ||
- ses->ntlmssp->sesskey_per_smbsess)
- flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
- }
+ if (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
sec_blob->NegotiateFlags = cpu_to_le32(flags);
@@ -652,6 +650,7 @@ sess_sendreceive(struct sess_data *sess_data)
int rc;
struct smb_hdr *smb_buf = (struct smb_hdr *) sess_data->iov[0].iov_base;
__u16 count;
+ struct kvec rsp_iov = { NULL, 0 };
count = sess_data->iov[1].iov_len + sess_data->iov[2].iov_len;
smb_buf->smb_buf_length =
@@ -661,7 +660,9 @@ sess_sendreceive(struct sess_data *sess_data)
rc = SendReceive2(sess_data->xid, sess_data->ses,
sess_data->iov, 3 /* num_iovecs */,
&sess_data->buf0_type,
- CIFS_LOG_ERROR);
+ CIFS_LOG_ERROR, &rsp_iov);
+ cifs_small_buf_release(sess_data->iov[0].iov_base);
+ memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec));
return rc;
}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index fc537c29044e..67a987e4d026 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -36,11 +36,11 @@
* SMB_COM_NT_CANCEL request and then sends it.
*/
static int
-send_nt_cancel(struct TCP_Server_Info *server, void *buf,
+send_nt_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst,
struct mid_q_entry *mid)
{
int rc = 0;
- struct smb_hdr *in_buf = (struct smb_hdr *)buf;
+ struct smb_hdr *in_buf = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
/* -4 for RFC1001 length and +2 for BCC field */
in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2);
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 0ffa18094335..401a5d856636 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -61,4 +61,9 @@
/* Maximum buffer size value we can send with 1 credit */
#define SMB2_MAX_BUFFER_SIZE 65536
+static inline struct smb2_sync_hdr *get_sync_hdr(void *buf)
+{
+ return &(((struct smb2_hdr *)buf)->sync_hdr);
+}
+
#endif /* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 8257a5a97cc0..3030a9dfb0dd 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -26,6 +26,7 @@
#include "smb2pdu.h"
#include "smb2proto.h"
#include "smb2status.h"
+#include "smb2glob.h"
struct status_to_posix_error {
__le32 smb2_status;
@@ -2449,10 +2450,10 @@ smb2_print_status(__le32 status)
int
map_smb2_to_linux_error(char *buf, bool log_err)
{
- struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
+ struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
unsigned int i;
int rc = -EIO;
- __le32 smb2err = hdr->Status;
+ __le32 smb2err = shdr->Status;
if (smb2err == 0)
return 0;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 3d383489b9cf..fd516ea8b8f8 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -28,31 +28,32 @@
#include "cifs_debug.h"
#include "cifs_unicode.h"
#include "smb2status.h"
+#include "smb2glob.h"
static int
-check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
+check_smb2_hdr(struct smb2_sync_hdr *shdr, __u64 mid)
{
- __u64 wire_mid = le64_to_cpu(hdr->MessageId);
+ __u64 wire_mid = le64_to_cpu(shdr->MessageId);
/*
* Make sure that this really is an SMB, that it is a response,
* and that the message ids match.
*/
- if ((hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
+ if ((shdr->ProtocolId == SMB2_PROTO_NUMBER) &&
(mid == wire_mid)) {
- if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
+ if (shdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
return 0;
else {
/* only one valid case where server sends us request */
- if (hdr->Command == SMB2_OPLOCK_BREAK)
+ if (shdr->Command == SMB2_OPLOCK_BREAK)
return 0;
else
cifs_dbg(VFS, "Received Request not response\n");
}
} else { /* bad signature or mid */
- if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ if (shdr->ProtocolId != SMB2_PROTO_NUMBER)
cifs_dbg(VFS, "Bad protocol string signature header %x\n",
- le32_to_cpu(hdr->ProtocolId));
+ le32_to_cpu(shdr->ProtocolId));
if (mid != wire_mid)
cifs_dbg(VFS, "Mids do not match: %llu and %llu\n",
mid, wire_mid);
@@ -95,8 +96,9 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
int
smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
{
- struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
- struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
+ struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
+ struct smb2_hdr *hdr = &pdu->hdr;
+ struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
__u64 mid;
__u32 len = get_rfc1002_length(buf);
__u32 clc_len; /* calculated length */
@@ -111,7 +113,7 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
* ie Validate the wct via smb2_struct_sizes table above
*/
- if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+ if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
struct smb2_transform_hdr *thdr =
(struct smb2_transform_hdr *)buf;
struct cifs_ses *ses = NULL;
@@ -133,10 +135,10 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
}
}
-
- mid = le64_to_cpu(hdr->MessageId);
+ mid = le64_to_cpu(shdr->MessageId);
if (length < sizeof(struct smb2_pdu)) {
- if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
+ if ((length >= sizeof(struct smb2_hdr))
+ && (shdr->Status != 0)) {
pdu->StructureSize2 = 0;
/*
* As with SMB/CIFS, on some error cases servers may
@@ -154,29 +156,30 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
return 1;
}
- if (check_smb2_hdr(hdr, mid))
+ if (check_smb2_hdr(shdr, mid))
return 1;
- if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) {
+ if (shdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) {
cifs_dbg(VFS, "Illegal structure size %u\n",
- le16_to_cpu(hdr->StructureSize));
+ le16_to_cpu(shdr->StructureSize));
return 1;
}
- command = le16_to_cpu(hdr->Command);
+ command = le16_to_cpu(shdr->Command);
if (command >= NUMBER_OF_SMB2_COMMANDS) {
cifs_dbg(VFS, "Illegal SMB2 command %d\n", command);
return 1;
}
if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) {
- if (command != SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0 ||
+ if (command != SMB2_OPLOCK_BREAK_HE && (shdr->Status == 0 ||
pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2)) {
/* error packets have 9 byte structure size */
cifs_dbg(VFS, "Illegal response size %u for command %d\n",
le16_to_cpu(pdu->StructureSize2), command);
return 1;
- } else if (command == SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0)
+ } else if (command == SMB2_OPLOCK_BREAK_HE
+ && (shdr->Status == 0)
&& (le16_to_cpu(pdu->StructureSize2) != 44)
&& (le16_to_cpu(pdu->StructureSize2) != 36)) {
/* special case for SMB2.1 lease break message */
@@ -199,7 +202,7 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
clc_len, 4 + len, mid);
/* create failed on symlink */
if (command == SMB2_CREATE_HE &&
- hdr->Status == STATUS_STOPPED_ON_SYMLINK)
+ shdr->Status == STATUS_STOPPED_ON_SYMLINK)
return 0;
/* Windows 7 server returns 24 bytes more */
if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
@@ -261,11 +264,12 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = {
char *
smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
{
+ struct smb2_sync_hdr *shdr = get_sync_hdr(hdr);
*off = 0;
*len = 0;
/* error responses do not have data area */
- if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED &&
+ if (shdr->Status && shdr->Status != STATUS_MORE_PROCESSING_REQUIRED &&
(((struct smb2_err_rsp *)hdr)->StructureSize) ==
SMB2_ERROR_STRUCTURE_SIZE2)
return NULL;
@@ -275,7 +279,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
* of the data buffer offset and data buffer length for the particular
* command.
*/
- switch (hdr->Command) {
+ switch (shdr->Command) {
case SMB2_NEGOTIATE:
*off = le16_to_cpu(
((struct smb2_negotiate_rsp *)hdr)->SecurityBufferOffset);
@@ -346,7 +350,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
/* return pointer to beginning of data area, ie offset from SMB start */
if ((*off != 0) && (*len != 0))
- return (char *)(&hdr->ProtocolId) + *off;
+ return (char *)shdr + *off;
else
return NULL;
}
@@ -358,12 +362,13 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
unsigned int
smb2_calc_size(void *buf)
{
- struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
- struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
+ struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
+ struct smb2_hdr *hdr = &pdu->hdr;
+ struct smb2_sync_hdr *shdr = get_sync_hdr(hdr);
int offset; /* the offset from the beginning of SMB to data area */
int data_length; /* the length of the variable length data area */
/* Structure Size has already been checked to make sure it is 64 */
- int len = 4 + le16_to_cpu(pdu->hdr.StructureSize);
+ int len = 4 + le16_to_cpu(shdr->StructureSize);
/*
* StructureSize2, ie length of fixed parameter area has already
@@ -371,7 +376,7 @@ smb2_calc_size(void *buf)
*/
len += le16_to_cpu(pdu->StructureSize2);
- if (has_smb2_data_area[le16_to_cpu(hdr->Command)] == false)
+ if (has_smb2_data_area[le16_to_cpu(shdr->Command)] == false)
goto calc_size_exit;
smb2_get_data_area_len(&offset, &data_length, hdr);
@@ -582,7 +587,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
cifs_dbg(FYI, "Checking for oplock break\n");
- if (rsp->hdr.Command != SMB2_OPLOCK_BREAK)
+ if (rsp->hdr.sync_hdr.Command != SMB2_OPLOCK_BREAK)
return false;
if (rsp->StructureSize !=
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 5d456ebb3813..a44b4dbe4aae 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -20,6 +20,8 @@
#include <linux/pagemap.h>
#include <linux/vfs.h>
#include <linux/falloc.h>
+#include <linux/scatterlist.h>
+#include <crypto/aead.h>
#include "cifsglob.h"
#include "smb2pdu.h"
#include "smb2proto.h"
@@ -119,7 +121,9 @@ smb2_get_credits_field(struct TCP_Server_Info *server, const int optype)
static unsigned int
smb2_get_credits(struct mid_q_entry *mid)
{
- return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest);
+ struct smb2_sync_hdr *shdr = get_sync_hdr(mid->resp_buf);
+
+ return le16_to_cpu(shdr->CreditRequest);
}
static int
@@ -184,10 +188,10 @@ static struct mid_q_entry *
smb2_find_mid(struct TCP_Server_Info *server, char *buf)
{
struct mid_q_entry *mid;
- struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
- __u64 wire_mid = le64_to_cpu(hdr->MessageId);
+ struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ __u64 wire_mid = le64_to_cpu(shdr->MessageId);
- if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+ if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
cifs_dbg(VFS, "encrypted frame parsing not supported yet");
return NULL;
}
@@ -196,7 +200,7 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
list_for_each_entry(mid, &server->pending_mid_q, qhead) {
if ((mid->mid == wire_mid) &&
(mid->mid_state == MID_REQUEST_SUBMITTED) &&
- (mid->command == hdr->Command)) {
+ (mid->command == shdr->Command)) {
spin_unlock(&GlobalMid_Lock);
return mid;
}
@@ -209,12 +213,12 @@ static void
smb2_dump_detail(void *buf)
{
#ifdef CONFIG_CIFS_DEBUG2
- struct smb2_hdr *smb = (struct smb2_hdr *)buf;
+ struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n",
- smb->Command, smb->Status, smb->Flags, smb->MessageId,
- smb->ProcessId);
- cifs_dbg(VFS, "smb buf %p len %u\n", smb, smb2_calc_size(smb));
+ shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId,
+ shdr->ProcessId);
+ cifs_dbg(VFS, "smb buf %p len %u\n", buf, smb2_calc_size(buf));
#endif
}
@@ -1002,14 +1006,14 @@ smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
static bool
smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
{
- struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
+ struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
- if (hdr->Status != STATUS_PENDING)
+ if (shdr->Status != STATUS_PENDING)
return false;
if (!length) {
spin_lock(&server->req_lock);
- server->credits += le16_to_cpu(hdr->CreditRequest);
+ server->credits += le16_to_cpu(shdr->CreditRequest);
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
}
@@ -1545,6 +1549,633 @@ smb2_dir_needs_close(struct cifsFileInfo *cfile)
return !cfile->invalidHandle;
}
+static void
+fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, struct smb_rqst *old_rq)
+{
+ struct smb2_sync_hdr *shdr =
+ (struct smb2_sync_hdr *)old_rq->rq_iov[1].iov_base;
+ unsigned int orig_len = get_rfc1002_length(old_rq->rq_iov[0].iov_base);
+
+ memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
+ tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
+ tr_hdr->OriginalMessageSize = cpu_to_le32(orig_len);
+ tr_hdr->Flags = cpu_to_le16(0x01);
+ get_random_bytes(&tr_hdr->Nonce, SMB3_AES128CMM_NONCE);
+ memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8);
+ inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - 4);
+ inc_rfc1001_len(tr_hdr, orig_len);
+}
+
+static struct scatterlist *
+init_sg(struct smb_rqst *rqst, u8 *sign)
+{
+ unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages + 1;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+ struct scatterlist *sg;
+ unsigned int i;
+ unsigned int j;
+
+ sg = kmalloc_array(sg_len, sizeof(struct scatterlist), GFP_KERNEL);
+ if (!sg)
+ return NULL;
+
+ sg_init_table(sg, sg_len);
+ sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 24, assoc_data_len);
+ for (i = 1; i < rqst->rq_nvec; i++)
+ sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base,
+ rqst->rq_iov[i].iov_len);
+ for (j = 0; i < sg_len - 1; i++, j++) {
+ unsigned int len = (j < rqst->rq_npages - 1) ? rqst->rq_pagesz
+ : rqst->rq_tailsz;
+ sg_set_page(&sg[i], rqst->rq_pages[j], len, 0);
+ }
+ sg_set_buf(&sg[sg_len - 1], sign, SMB2_SIGNATURE_SIZE);
+ return sg;
+}
+
+struct cifs_crypt_result {
+ int err;
+ struct completion completion;
+};
+
+static void cifs_crypt_complete(struct crypto_async_request *req, int err)
+{
+ struct cifs_crypt_result *res = req->data;
+
+ if (err == -EINPROGRESS)
+ return;
+
+ res->err = err;
+ complete(&res->completion);
+}
+
+/*
+ * Encrypt or decrypt @rqst message. @rqst has the following format:
+ * iov[0] - transform header (associate data),
+ * iov[1-N] and pages - data to encrypt.
+ * On success return encrypted data in iov[1-N] and pages, leave iov[0]
+ * untouched.
+ */
+static int
+crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
+{
+ struct smb2_transform_hdr *tr_hdr =
+ (struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+ struct cifs_ses *ses;
+ int rc = 0;
+ struct scatterlist *sg;
+ u8 sign[SMB2_SIGNATURE_SIZE] = {};
+ struct aead_request *req;
+ char *iv;
+ unsigned int iv_len;
+ struct cifs_crypt_result result = {0, };
+ struct crypto_aead *tfm;
+ unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
+
+ init_completion(&result.completion);
+
+ ses = smb2_find_smb_ses(server, tr_hdr->SessionId);
+ if (!ses) {
+ cifs_dbg(VFS, "%s: Could not find session\n", __func__);
+ return 0;
+ }
+
+ rc = smb3_crypto_aead_allocate(server);
+ if (rc) {
+ cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__);
+ return rc;
+ }
+
+ tfm = enc ? server->secmech.ccmaesencrypt :
+ server->secmech.ccmaesdecrypt;
+ rc = crypto_aead_setkey(tfm, enc ? ses->smb3encryptionkey :
+ ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc);
+ return rc;
+ }
+
+ rc = crypto_aead_setauthsize(tfm, SMB2_SIGNATURE_SIZE);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Failed to set authsize %d\n", __func__, rc);
+ return rc;
+ }
+
+ req = aead_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ cifs_dbg(VFS, "%s: Failed to alloc aead request", __func__);
+ return -ENOMEM;
+ }
+
+ if (!enc) {
+ memcpy(sign, &tr_hdr->Signature, SMB2_SIGNATURE_SIZE);
+ crypt_len += SMB2_SIGNATURE_SIZE;
+ }
+
+ sg = init_sg(rqst, sign);
+ if (!sg) {
+ cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+ goto free_req;
+ }
+
+ iv_len = crypto_aead_ivsize(tfm);
+ iv = kzalloc(iv_len, GFP_KERNEL);
+ if (!iv) {
+ cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+ goto free_sg;
+ }
+ iv[0] = 3;
+ memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES128CMM_NONCE);
+
+ aead_request_set_crypt(req, sg, sg, crypt_len, iv);
+ aead_request_set_ad(req, assoc_data_len);
+
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ cifs_crypt_complete, &result);
+
+ rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
+
+ if (rc == -EINPROGRESS || rc == -EBUSY) {
+ wait_for_completion(&result.completion);
+ rc = result.err;
+ }
+
+ if (!rc && enc)
+ memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
+
+ kfree(iv);
+free_sg:
+ kfree(sg);
+free_req:
+ kfree(req);
+ return rc;
+}
+
+static int
+smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
+ struct smb_rqst *old_rq)
+{
+ struct kvec *iov;
+ struct page **pages;
+ struct smb2_transform_hdr *tr_hdr;
+ unsigned int npages = old_rq->rq_npages;
+ int i;
+ int rc = -ENOMEM;
+
+ pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ return rc;
+
+ new_rq->rq_pages = pages;
+ new_rq->rq_npages = old_rq->rq_npages;
+ new_rq->rq_pagesz = old_rq->rq_pagesz;
+ new_rq->rq_tailsz = old_rq->rq_tailsz;
+
+ for (i = 0; i < npages; i++) {
+ pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
+ if (!pages[i])
+ goto err_free_pages;
+ }
+
+ iov = kmalloc_array(old_rq->rq_nvec, sizeof(struct kvec), GFP_KERNEL);
+ if (!iov)
+ goto err_free_pages;
+
+ /* copy all iovs from the old except the 1st one (rfc1002 length) */
+ memcpy(&iov[1], &old_rq->rq_iov[1],
+ sizeof(struct kvec) * (old_rq->rq_nvec - 1));
+ new_rq->rq_iov = iov;
+ new_rq->rq_nvec = old_rq->rq_nvec;
+
+ tr_hdr = kmalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
+ if (!tr_hdr)
+ goto err_free_iov;
+
+ /* fill the 1st iov with a transform header */
+ fill_transform_hdr(tr_hdr, old_rq);
+ new_rq->rq_iov[0].iov_base = tr_hdr;
+ new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+
+ /* copy pages form the old */
+ for (i = 0; i < npages; i++) {
+ char *dst = kmap(new_rq->rq_pages[i]);
+ char *src = kmap(old_rq->rq_pages[i]);
+ unsigned int len = (i < npages - 1) ? new_rq->rq_pagesz :
+ new_rq->rq_tailsz;
+ memcpy(dst, src, len);
+ kunmap(new_rq->rq_pages[i]);
+ kunmap(old_rq->rq_pages[i]);
+ }
+
+ rc = crypt_message(server, new_rq, 1);
+ cifs_dbg(FYI, "encrypt message returned %d", rc);
+ if (rc)
+ goto err_free_tr_hdr;
+
+ return rc;
+
+err_free_tr_hdr:
+ kfree(tr_hdr);
+err_free_iov:
+ kfree(iov);
+err_free_pages:
+ for (i = i - 1; i >= 0; i--)
+ put_page(pages[i]);
+ kfree(pages);
+ return rc;
+}
+
+static void
+smb3_free_transform_rq(struct smb_rqst *rqst)
+{
+ int i = rqst->rq_npages - 1;
+
+ for (; i >= 0; i--)
+ put_page(rqst->rq_pages[i]);
+ kfree(rqst->rq_pages);
+ /* free transform header */
+ kfree(rqst->rq_iov[0].iov_base);
+ kfree(rqst->rq_iov);
+}
+
+static int
+smb3_is_transform_hdr(void *buf)
+{
+ struct smb2_transform_hdr *trhdr = buf;
+
+ return trhdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM;
+}
+
+static int
+decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
+ unsigned int buf_data_size, struct page **pages,
+ unsigned int npages, unsigned int page_data_size)
+{
+ struct kvec iov[2];
+ struct smb_rqst rqst = {NULL};
+ struct smb2_hdr *hdr;
+ int rc;
+
+ iov[0].iov_base = buf;
+ iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+ iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
+ iov[1].iov_len = buf_data_size;
+
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 2;
+ rqst.rq_pages = pages;
+ rqst.rq_npages = npages;
+ rqst.rq_pagesz = PAGE_SIZE;
+ rqst.rq_tailsz = (page_data_size % PAGE_SIZE) ? : PAGE_SIZE;
+
+ rc = crypt_message(server, &rqst, 0);
+ cifs_dbg(FYI, "decrypt message returned %d\n", rc);
+
+ if (rc)
+ return rc;
+
+ memmove(buf + 4, iov[1].iov_base, buf_data_size);
+ hdr = (struct smb2_hdr *)buf;
+ hdr->smb2_buf_length = cpu_to_be32(buf_data_size + page_data_size);
+ server->total_read = buf_data_size + page_data_size + 4;
+
+ return rc;
+}
+
+static int
+read_data_into_pages(struct TCP_Server_Info *server, struct page **pages,
+ unsigned int npages, unsigned int len)
+{
+ int i;
+ int length;
+
+ for (i = 0; i < npages; i++) {
+ struct page *page = pages[i];
+ size_t n;
+
+ n = len;
+ if (len >= PAGE_SIZE) {
+ /* enough data to fill the page */
+ n = PAGE_SIZE;
+ len -= n;
+ } else {
+ zero_user(page, len, PAGE_SIZE - len);
+ len = 0;
+ }
+ length = cifs_read_page_from_socket(server, page, n);
+ if (length < 0)
+ return length;
+ server->total_read += length;
+ }
+
+ return 0;
+}
+
+static int
+init_read_bvec(struct page **pages, unsigned int npages, unsigned int data_size,
+ unsigned int cur_off, struct bio_vec **page_vec)
+{
+ struct bio_vec *bvec;
+ int i;
+
+ bvec = kcalloc(npages, sizeof(struct bio_vec), GFP_KERNEL);
+ if (!bvec)
+ return -ENOMEM;
+
+ for (i = 0; i < npages; i++) {
+ bvec[i].bv_page = pages[i];
+ bvec[i].bv_offset = (i == 0) ? cur_off : 0;
+ bvec[i].bv_len = min_t(unsigned int, PAGE_SIZE, data_size);
+ data_size -= bvec[i].bv_len;
+ }
+
+ if (data_size != 0) {
+ cifs_dbg(VFS, "%s: something went wrong\n", __func__);
+ kfree(bvec);
+ return -EIO;
+ }
+
+ *page_vec = bvec;
+ return 0;
+}
+
+static int
+handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
+ char *buf, unsigned int buf_len, struct page **pages,
+ unsigned int npages, unsigned int page_data_size)
+{
+ unsigned int data_offset;
+ unsigned int data_len;
+ unsigned int cur_off;
+ unsigned int cur_page_idx;
+ unsigned int pad_len;
+ struct cifs_readdata *rdata = mid->callback_data;
+ struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+ struct bio_vec *bvec = NULL;
+ struct iov_iter iter;
+ struct kvec iov;
+ int length;
+
+ if (shdr->Command != SMB2_READ) {
+ cifs_dbg(VFS, "only big read responses are supported\n");
+ return -ENOTSUPP;
+ }
+
+ if (server->ops->is_status_pending &&
+ server->ops->is_status_pending(buf, server, 0))
+ return -1;
+
+ rdata->result = server->ops->map_error(buf, false);
+ if (rdata->result != 0) {
+ cifs_dbg(FYI, "%s: server returned error %d\n",
+ __func__, rdata->result);
+ dequeue_mid(mid, rdata->result);
+ return 0;
+ }
+
+ data_offset = server->ops->read_data_offset(buf) + 4;
+ data_len = server->ops->read_data_length(buf);
+
+ if (data_offset < server->vals->read_rsp_size) {
+ /*
+ * win2k8 sometimes sends an offset of 0 when the read
+ * is beyond the EOF. Treat it as if the data starts just after
+ * the header.
+ */
+ cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n",
+ __func__, data_offset);
+ data_offset = server->vals->read_rsp_size;
+ } else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) {
+ /* data_offset is beyond the end of smallbuf */
+ cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
+ __func__, data_offset);
+ rdata->result = -EIO;
+ dequeue_mid(mid, rdata->result);
+ return 0;
+ }
+
+ pad_len = data_offset - server->vals->read_rsp_size;
+
+ if (buf_len <= data_offset) {
+ /* read response payload is in pages */
+ cur_page_idx = pad_len / PAGE_SIZE;
+ cur_off = pad_len % PAGE_SIZE;
+
+ if (cur_page_idx != 0) {
+ /* data offset is beyond the 1st page of response */
+ cifs_dbg(FYI, "%s: data offset (%u) beyond 1st page of response\n",
+ __func__, data_offset);
+ rdata->result = -EIO;
+ dequeue_mid(mid, rdata->result);
+ return 0;
+ }
+
+ if (data_len > page_data_size - pad_len) {
+ /* data_len is corrupt -- discard frame */
+ rdata->result = -EIO;
+ dequeue_mid(mid, rdata->result);
+ return 0;
+ }
+
+ rdata->result = init_read_bvec(pages, npages, page_data_size,
+ cur_off, &bvec);
+ if (rdata->result != 0) {
+ dequeue_mid(mid, rdata->result);
+ return 0;
+ }
+
+ iov_iter_bvec(&iter, WRITE | ITER_BVEC, bvec, npages, data_len);
+ } else if (buf_len >= data_offset + data_len) {
+ /* read response payload is in buf */
+ WARN_ONCE(npages > 0, "read data can be either in buf or in pages");
+ iov.iov_base = buf + data_offset;
+ iov.iov_len = data_len;
+ iov_iter_kvec(&iter, WRITE | ITER_KVEC, &iov, 1, data_len);
+ } else {
+ /* read response payload cannot be in both buf and pages */
+ WARN_ONCE(1, "buf can not contain only a part of read data");
+ rdata->result = -EIO;
+ dequeue_mid(mid, rdata->result);
+ return 0;
+ }
+
+ /* set up first iov for signature check */
+ rdata->iov[0].iov_base = buf;
+ rdata->iov[0].iov_len = 4;
+ rdata->iov[1].iov_base = buf + 4;
+ rdata->iov[1].iov_len = server->vals->read_rsp_size - 4;
+ cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
+ rdata->iov[0].iov_base, server->vals->read_rsp_size);
+
+ length = rdata->copy_into_pages(server, rdata, &iter);
+
+ kfree(bvec);
+
+ if (length < 0)
+ return length;
+
+ dequeue_mid(mid, false);
+ return length;
+}
+
+static int
+receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
+{
+ char *buf = server->smallbuf;
+ struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
+ unsigned int npages;
+ struct page **pages;
+ unsigned int len;
+ unsigned int buflen = get_rfc1002_length(buf) + 4;
+ int rc;
+ int i = 0;
+
+ len = min_t(unsigned int, buflen, server->vals->read_rsp_size - 4 +
+ sizeof(struct smb2_transform_hdr)) - HEADER_SIZE(server) + 1;
+
+ rc = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, len);
+ if (rc < 0)
+ return rc;
+ server->total_read += rc;
+
+ len = le32_to_cpu(tr_hdr->OriginalMessageSize) + 4 -
+ server->vals->read_rsp_size;
+ npages = DIV_ROUND_UP(len, PAGE_SIZE);
+
+ pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+ if (!pages) {
+ rc = -ENOMEM;
+ goto discard_data;
+ }
+
+ for (; i < npages; i++) {
+ pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
+ if (!pages[i]) {
+ rc = -ENOMEM;
+ goto discard_data;
+ }
+ }
+
+ /* read read data into pages */
+ rc = read_data_into_pages(server, pages, npages, len);
+ if (rc)
+ goto free_pages;
+
+ rc = cifs_discard_remaining_data(server);
+ if (rc)
+ goto free_pages;
+
+ rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size - 4,
+ pages, npages, len);
+ if (rc)
+ goto free_pages;
+
+ *mid = smb2_find_mid(server, buf);
+ if (*mid == NULL)
+ cifs_dbg(FYI, "mid not found\n");
+ else {
+ cifs_dbg(FYI, "mid found\n");
+ (*mid)->decrypted = true;
+ rc = handle_read_data(server, *mid, buf,
+ server->vals->read_rsp_size,
+ pages, npages, len);
+ }
+
+free_pages:
+ for (i = i - 1; i >= 0; i--)
+ put_page(pages[i]);
+ kfree(pages);
+ return rc;
+discard_data:
+ cifs_discard_remaining_data(server);
+ goto free_pages;
+}
+
+static int
+receive_encrypted_standard(struct TCP_Server_Info *server,
+ struct mid_q_entry **mid)
+{
+ int length;
+ char *buf = server->smallbuf;
+ unsigned int pdu_length = get_rfc1002_length(buf);
+ unsigned int buf_size;
+ struct mid_q_entry *mid_entry;
+
+ /* switch to large buffer if too big for a small one */
+ if (pdu_length + 4 > MAX_CIFS_SMALL_BUFFER_SIZE) {
+ server->large_buf = true;
+ memcpy(server->bigbuf, buf, server->total_read);
+ buf = server->bigbuf;
+ }
+
+ /* now read the rest */
+ length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1,
+ pdu_length - HEADER_SIZE(server) + 1 + 4);
+ if (length < 0)
+ return length;
+ server->total_read += length;
+
+ buf_size = pdu_length + 4 - sizeof(struct smb2_transform_hdr);
+ length = decrypt_raw_data(server, buf, buf_size, NULL, 0, 0);
+ if (length)
+ return length;
+
+ mid_entry = smb2_find_mid(server, buf);
+ if (mid_entry == NULL)
+ cifs_dbg(FYI, "mid not found\n");
+ else {
+ cifs_dbg(FYI, "mid found\n");
+ mid_entry->decrypted = true;
+ }
+
+ *mid = mid_entry;
+
+ if (mid_entry && mid_entry->handle)
+ return mid_entry->handle(server, mid_entry);
+
+ return cifs_handle_standard(server, mid_entry);
+}
+
+static int
+smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
+{
+ char *buf = server->smallbuf;
+ unsigned int pdu_length = get_rfc1002_length(buf);
+ struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
+ unsigned int orig_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
+
+ if (pdu_length + 4 < sizeof(struct smb2_transform_hdr) +
+ sizeof(struct smb2_sync_hdr)) {
+ cifs_dbg(VFS, "Transform message is too small (%u)\n",
+ pdu_length);
+ cifs_reconnect(server);
+ wake_up(&server->response_q);
+ return -ECONNABORTED;
+ }
+
+ if (pdu_length + 4 < orig_len + sizeof(struct smb2_transform_hdr)) {
+ cifs_dbg(VFS, "Transform message is broken\n");
+ cifs_reconnect(server);
+ wake_up(&server->response_q);
+ return -ECONNABORTED;
+ }
+
+ if (pdu_length + 4 > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
+ return receive_encrypted_read(server, mid);
+
+ return receive_encrypted_standard(server, mid);
+}
+
+int
+smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+ char *buf = server->large_buf ? server->bigbuf : server->smallbuf;
+
+ return handle_read_data(server, mid, buf, get_rfc1002_length(buf) + 4,
+ NULL, 0, 0);
+}
+
struct smb_version_operations smb20_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -1791,6 +2422,10 @@ struct smb_version_operations smb30_operations = {
.dir_needs_close = smb2_dir_needs_close,
.fallocate = smb3_fallocate,
.enum_snapshots = smb3_enum_snapshots,
+ .init_transform_rq = smb3_init_transform_rq,
+ .free_transform_rq = smb3_free_transform_rq,
+ .is_transform_hdr = smb3_is_transform_hdr,
+ .receive_transform = smb3_receive_transform,
};
#ifdef CONFIG_CIFS_SMB311
@@ -1879,6 +2514,10 @@ struct smb_version_operations smb311_operations = {
.dir_needs_close = smb2_dir_needs_close,
.fallocate = smb3_fallocate,
.enum_snapshots = smb3_enum_snapshots,
+ .init_transform_rq = smb3_init_transform_rq,
+ .free_transform_rq = smb3_free_transform_rq,
+ .is_transform_hdr = smb3_is_transform_hdr,
+ .receive_transform = smb3_receive_transform,
};
#endif /* CIFS_SMB311 */
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 87457227812c..ad83b3db2840 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -77,45 +77,42 @@ static const int smb2_req_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
/* SMB2_OPLOCK_BREAK */ 24 /* BB this is 36 for LEASE_BREAK variant */
};
+static int encryption_required(const struct cifs_tcon *tcon)
+{
+ if (!tcon)
+ return 0;
+ if ((tcon->ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) ||
+ (tcon->share_flags & SHI1005_FLAGS_ENCRYPT_DATA))
+ return 1;
+ if (tcon->seal &&
+ (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
+ return 1;
+ return 0;
+}
static void
-smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
+smb2_hdr_assemble(struct smb2_sync_hdr *shdr, __le16 smb2_cmd,
const struct cifs_tcon *tcon)
{
- struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
- char *temp = (char *)hdr;
- /* lookup word count ie StructureSize from table */
- __u16 parmsize = smb2_req_struct_sizes[le16_to_cpu(smb2_cmd)];
-
- /*
- * smaller than SMALL_BUFFER_SIZE but bigger than fixed area of
- * largest operations (Create)
- */
- memset(temp, 0, 256);
-
- /* Note this is only network field converted to big endian */
- hdr->smb2_buf_length = cpu_to_be32(parmsize + sizeof(struct smb2_hdr)
- - 4 /* RFC 1001 length field itself not counted */);
-
- hdr->ProtocolId = SMB2_PROTO_NUMBER;
- hdr->StructureSize = cpu_to_le16(64);
- hdr->Command = smb2_cmd;
+ shdr->ProtocolId = SMB2_PROTO_NUMBER;
+ shdr->StructureSize = cpu_to_le16(64);
+ shdr->Command = smb2_cmd;
if (tcon && tcon->ses && tcon->ses->server) {
struct TCP_Server_Info *server = tcon->ses->server;
spin_lock(&server->req_lock);
/* Request up to 2 credits but don't go over the limit. */
if (server->credits >= server->max_credits)
- hdr->CreditRequest = cpu_to_le16(0);
+ shdr->CreditRequest = cpu_to_le16(0);
else
- hdr->CreditRequest = cpu_to_le16(
+ shdr->CreditRequest = cpu_to_le16(
min_t(int, server->max_credits -
server->credits, 2));
spin_unlock(&server->req_lock);
} else {
- hdr->CreditRequest = cpu_to_le16(2);
+ shdr->CreditRequest = cpu_to_le16(2);
}
- hdr->ProcessId = cpu_to_le32((__u16)current->tgid);
+ shdr->ProcessId = cpu_to_le32((__u16)current->tgid);
if (!tcon)
goto out;
@@ -124,13 +121,13 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
/* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */
if ((tcon->ses) && (tcon->ses->server) &&
(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
- hdr->CreditCharge = cpu_to_le16(1);
+ shdr->CreditCharge = cpu_to_le16(1);
/* else CreditCharge MBZ */
- hdr->TreeId = tcon->tid;
+ shdr->TreeId = tcon->tid;
/* Uid is not converted */
if (tcon->ses)
- hdr->SessionId = tcon->ses->Suid;
+ shdr->SessionId = tcon->ses->Suid;
/*
* If we would set SMB2_FLAGS_DFS_OPERATIONS on open we also would have
@@ -143,12 +140,12 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
* but it is safer to net set it for now.
*/
/* if (tcon->share_flags & SHI1005_FLAGS_DFS)
- hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */
+ shdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */
- if (tcon->ses && tcon->ses->server && tcon->ses->server->sign)
- hdr->Flags |= SMB2_FLAGS_SIGNED;
+ if (tcon->ses && tcon->ses->server && tcon->ses->server->sign &&
+ !encryption_required(tcon))
+ shdr->Flags |= SMB2_FLAGS_SIGNED;
out:
- pdu->StructureSize2 = cpu_to_le16(parmsize);
return;
}
@@ -289,16 +286,74 @@ out:
return rc;
}
+static void
+fill_small_buf(__le16 smb2_command, struct cifs_tcon *tcon, void *buf,
+ unsigned int *total_len)
+{
+ struct smb2_sync_pdu *spdu = (struct smb2_sync_pdu *)buf;
+ /* lookup word count ie StructureSize from table */
+ __u16 parmsize = smb2_req_struct_sizes[le16_to_cpu(smb2_command)];
+
+ /*
+ * smaller than SMALL_BUFFER_SIZE but bigger than fixed area of
+ * largest operations (Create)
+ */
+ memset(buf, 0, 256);
+
+ smb2_hdr_assemble(&spdu->sync_hdr, smb2_command, tcon);
+ spdu->StructureSize2 = cpu_to_le16(parmsize);
+
+ *total_len = parmsize + sizeof(struct smb2_sync_hdr);
+}
+
+/* init request without RFC1001 length at the beginning */
+static int
+smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
+ void **request_buf, unsigned int *total_len)
+{
+ int rc;
+ struct smb2_sync_hdr *shdr;
+
+ rc = smb2_reconnect(smb2_command, tcon);
+ if (rc)
+ return rc;
+
+ /* BB eventually switch this to SMB2 specific small buf size */
+ *request_buf = cifs_small_buf_get();
+ if (*request_buf == NULL) {
+ /* BB should we add a retry in here if not a writepage? */
+ return -ENOMEM;
+ }
+
+ shdr = (struct smb2_sync_hdr *)(*request_buf);
+
+ fill_small_buf(smb2_command, tcon, shdr, total_len);
+
+ if (tcon != NULL) {
+#ifdef CONFIG_CIFS_STATS2
+ uint16_t com_code = le16_to_cpu(smb2_command);
+
+ cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]);
+#endif
+ cifs_stats_inc(&tcon->num_smbs_sent);
+ }
+
+ return rc;
+}
+
/*
* Allocate and return pointer to an SMB request hdr, and set basic
* SMB information in the SMB header. If the return code is zero, this
- * function must have filled in request_buf pointer.
+ * function must have filled in request_buf pointer. The returned buffer
+ * has RFC1001 length at the beginning.
*/
static int
small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
void **request_buf)
{
- int rc = 0;
+ int rc;
+ unsigned int total_len;
+ struct smb2_pdu *pdu;
rc = smb2_reconnect(smb2_command, tcon);
if (rc)
@@ -311,7 +366,12 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
return -ENOMEM;
}
- smb2_hdr_assemble((struct smb2_hdr *) *request_buf, smb2_command, tcon);
+ pdu = (struct smb2_pdu *)(*request_buf);
+
+ fill_small_buf(smb2_command, tcon, get_sync_hdr(pdu), &total_len);
+
+ /* Note this is only network field converted to big endian */
+ pdu->hdr.smb2_buf_length = cpu_to_be32(total_len);
if (tcon != NULL) {
#ifdef CONFIG_CIFS_STATS2
@@ -376,7 +436,6 @@ static void assemble_neg_contexts(struct smb2_negotiate_req *req)
}
#endif /* SMB311 */
-
/*
*
* SMB2 Worker functions follow:
@@ -398,6 +457,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
struct smb2_negotiate_req *req;
struct smb2_negotiate_rsp *rsp;
struct kvec iov[1];
+ struct kvec rsp_iov;
int rc = 0;
int resp_buftype;
struct TCP_Server_Info *server = ses->server;
@@ -416,7 +476,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
if (rc)
return rc;
- req->hdr.SessionId = 0;
+ req->hdr.sync_hdr.SessionId = 0;
req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
@@ -446,9 +506,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
/* 4 for rfc1002 length field */
iov[0].iov_len = get_rfc1002_length(req) + 4;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags);
-
- rsp = (struct smb2_negotiate_rsp *)iov[0].iov_base;
+ rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_negotiate_rsp *)rsp_iov.iov_base;
/*
* No tcon so can't do
* cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]);
@@ -627,14 +687,15 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
if (rc)
return rc;
- req->hdr.SessionId = 0; /* First session, not a reauthenticate */
+ /* First session, not a reauthenticate */
+ req->hdr.sync_hdr.SessionId = 0;
/* if reconnect, we need to send previous sess id, otherwise it is 0 */
req->PreviousSessionId = sess_data->previous_session;
req->Flags = 0; /* MBZ */
/* to enable echos and oplocks */
- req->hdr.CreditRequest = cpu_to_le16(3);
+ req->hdr.sync_hdr.CreditRequest = cpu_to_le16(3);
/* only one of SMB2 signing flags may be set in SMB2 request */
if (server->sign)
@@ -671,6 +732,7 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
{
int rc;
struct smb2_sess_setup_req *req = sess_data->iov[0].iov_base;
+ struct kvec rsp_iov = { NULL, 0 };
/* Testing shows that buffer offset must be at location of Buffer[0] */
req->SecurityBufferOffset =
@@ -685,7 +747,9 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
rc = SendReceive2(sess_data->xid, sess_data->ses,
sess_data->iov, 2,
&sess_data->buf0_type,
- CIFS_LOG_ERROR | CIFS_NEG_OP);
+ CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov);
+ cifs_small_buf_release(sess_data->iov[0].iov_base);
+ memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec));
return rc;
}
@@ -697,15 +761,13 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
struct cifs_ses *ses = sess_data->ses;
mutex_lock(&ses->server->srv_mutex);
- if (ses->server->sign && ses->server->ops->generate_signingkey) {
+ if (ses->server->ops->generate_signingkey) {
rc = ses->server->ops->generate_signingkey(ses);
- kfree(ses->auth_key.response);
- ses->auth_key.response = NULL;
if (rc) {
cifs_dbg(FYI,
"SMB3 session key generation failed\n");
mutex_unlock(&ses->server->srv_mutex);
- goto keygen_exit;
+ return rc;
}
}
if (!ses->server->session_estab) {
@@ -719,12 +781,6 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
ses->status = CifsGood;
ses->need_reconnect = false;
spin_unlock(&GlobalMid_Lock);
-
-keygen_exit:
- if (!ses->server->sign) {
- kfree(ses->auth_key.response);
- ses->auth_key.response = NULL;
- }
return rc;
}
@@ -781,11 +837,9 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
goto out_put_spnego_key;
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
- ses->Suid = rsp->hdr.SessionId;
+ ses->Suid = rsp->hdr.sync_hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
- if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
- cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
rc = SMB2_sess_establish_session(sess_data);
out_put_spnego_key:
@@ -859,7 +913,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
/* If true, rc here is expected and not an error */
if (sess_data->buf0_type != CIFS_NO_BUFFER &&
- rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
+ rsp->hdr.sync_hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
rc = 0;
if (rc)
@@ -880,10 +934,8 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
- ses->Suid = rsp->hdr.SessionId;
+ ses->Suid = rsp->hdr.sync_hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
- if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
- cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
out:
kfree(ntlmssp_blob);
@@ -916,7 +968,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
goto out;
req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base;
- req->hdr.SessionId = ses->Suid;
+ req->hdr.sync_hdr.SessionId = ses->Suid;
rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
sess_data->nls_cp);
@@ -940,10 +992,8 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
- ses->Suid = rsp->hdr.SessionId;
+ ses->Suid = rsp->hdr.sync_hdr.SessionId;
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
- if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
- cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
rc = SMB2_sess_establish_session(sess_data);
out:
@@ -1018,6 +1068,7 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
struct smb2_logoff_req *req; /* response is also trivial struct */
int rc = 0;
struct TCP_Server_Info *server;
+ int flags = 0;
cifs_dbg(FYI, "disconnect session %p\n", ses);
@@ -1035,11 +1086,15 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
return rc;
/* since no tcon, smb2_init can not do this, so do here */
- req->hdr.SessionId = ses->Suid;
- if (server->sign)
- req->hdr.Flags |= SMB2_FLAGS_SIGNED;
+ req->hdr.sync_hdr.SessionId = ses->Suid;
+
+ if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+ flags |= CIFS_TRANSFORM_REQ;
+ else if (server->sign)
+ req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
- rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0);
+ rc = SendReceiveNoRsp(xid, ses, (char *) req, flags);
+ cifs_small_buf_release(req);
/*
* No tcon so can't do
* cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]);
@@ -1071,11 +1126,13 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
struct smb2_tree_connect_req *req;
struct smb2_tree_connect_rsp *rsp = NULL;
struct kvec iov[2];
+ struct kvec rsp_iov;
int rc = 0;
int resp_buftype;
int unc_path_len;
struct TCP_Server_Info *server;
__le16 *unc_path = NULL;
+ int flags = 0;
cifs_dbg(FYI, "TCON\n");
@@ -1087,12 +1144,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
if (tcon && tcon->bad_network_name)
return -ENOENT;
- if ((tcon && tcon->seal) &&
- ((ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) == 0)) {
- cifs_dbg(VFS, "encryption requested but no server support");
- return -EOPNOTSUPP;
- }
-
unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL);
if (unc_path == NULL)
return -ENOMEM;
@@ -1111,11 +1162,15 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
}
if (tcon == NULL) {
+ if ((ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA))
+ flags |= CIFS_TRANSFORM_REQ;
+
/* since no tcon, smb2_init can not do this, so do here */
- req->hdr.SessionId = ses->Suid;
+ req->hdr.sync_hdr.SessionId = ses->Suid;
/* if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED)
req->hdr.Flags |= SMB2_FLAGS_SIGNED; */
- }
+ } else if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field and 1 for pad */
@@ -1130,8 +1185,9 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
inc_rfc1001_len(req, unc_path_len - 1 /* pad */);
- rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0);
- rsp = (struct smb2_tree_connect_rsp *)iov[0].iov_base;
+ rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
if (rc != 0) {
if (tcon) {
@@ -1142,7 +1198,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
}
if (tcon == NULL) {
- ses->ipc_tid = rsp->hdr.TreeId;
+ ses->ipc_tid = rsp->hdr.sync_hdr.TreeId;
goto tcon_exit;
}
@@ -1165,15 +1221,18 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
tcon->tidStatus = CifsGood;
tcon->need_reconnect = false;
- tcon->tid = rsp->hdr.TreeId;
+ tcon->tid = rsp->hdr.sync_hdr.TreeId;
strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
cifs_dbg(VFS, "DFS capability contradicts DFS flag\n");
+
+ if (tcon->seal &&
+ !(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
+ cifs_dbg(VFS, "Encryption is requested but not supported\n");
+
init_copy_chunk_defaults(tcon);
- if (tcon->share_flags & SHI1005_FLAGS_ENCRYPT_DATA)
- cifs_dbg(VFS, "Encrypted shares not supported");
if (tcon->ses->server->ops->validate_negotiate)
rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
tcon_exit:
@@ -1182,7 +1241,7 @@ tcon_exit:
return rc;
tcon_error_exit:
- if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
+ if (rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
if (tcon)
tcon->bad_network_name = true;
@@ -1197,6 +1256,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
int rc = 0;
struct TCP_Server_Info *server;
struct cifs_ses *ses = tcon->ses;
+ int flags = 0;
cifs_dbg(FYI, "Tree Disconnect\n");
@@ -1212,7 +1272,11 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if (rc)
return rc;
- rc = SendReceiveNoRsp(xid, ses, (char *)&req->hdr, 0);
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ rc = SendReceiveNoRsp(xid, ses, (char *)req, flags);
+ cifs_small_buf_release(req);
if (rc)
cifs_stats_fail_inc(tcon, SMB2_TREE_DISCONNECT_HE);
@@ -1474,14 +1538,16 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
struct cifs_tcon *tcon = oparms->tcon;
struct cifs_ses *ses = tcon->ses;
struct kvec iov[4];
+ struct kvec rsp_iov;
int resp_buftype;
int uni_path_len;
__le16 *copy_path = NULL;
int copy_size;
int rc = 0;
- unsigned int num_iovecs = 2;
+ unsigned int n_iov = 2;
__u32 file_attributes = 0;
char *dhc_buf = NULL, *lc_buf = NULL;
+ int flags = 0;
cifs_dbg(FYI, "create/open\n");
@@ -1494,6 +1560,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
if (rc)
return rc;
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
if (oparms->create_options & CREATE_OPTION_READONLY)
file_attributes |= ATTR_READONLY;
if (oparms->create_options & CREATE_OPTION_SPECIAL)
@@ -1544,25 +1613,25 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
*oplock == SMB2_OPLOCK_LEVEL_NONE)
req->RequestedOplockLevel = *oplock;
else {
- rc = add_lease_context(server, iov, &num_iovecs, oplock);
+ rc = add_lease_context(server, iov, &n_iov, oplock);
if (rc) {
cifs_small_buf_release(req);
kfree(copy_path);
return rc;
}
- lc_buf = iov[num_iovecs-1].iov_base;
+ lc_buf = iov[n_iov-1].iov_base;
}
if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) {
/* need to set Next field of lease context if we request it */
if (server->capabilities & SMB2_GLOBAL_CAP_LEASING) {
struct create_context *ccontext =
- (struct create_context *)iov[num_iovecs-1].iov_base;
+ (struct create_context *)iov[n_iov-1].iov_base;
ccontext->Next =
cpu_to_le32(server->vals->create_lease_size);
}
- rc = add_durable_context(iov, &num_iovecs, oparms,
+ rc = add_durable_context(iov, &n_iov, oparms,
tcon->use_persistent);
if (rc) {
cifs_small_buf_release(req);
@@ -1570,11 +1639,12 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
kfree(lc_buf);
return rc;
}
- dhc_buf = iov[num_iovecs-1].iov_base;
+ dhc_buf = iov[n_iov-1].iov_base;
}
- rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
- rsp = (struct smb2_create_rsp *)iov[0].iov_base;
+ rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
@@ -1618,12 +1688,15 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
{
struct smb2_ioctl_req *req;
struct smb2_ioctl_rsp *rsp;
+ struct smb2_sync_hdr *shdr;
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct kvec iov[2];
+ struct kvec rsp_iov;
int resp_buftype;
- int num_iovecs;
+ int n_iov;
int rc = 0;
+ int flags = 0;
cifs_dbg(FYI, "SMB2 IOCTL\n");
@@ -1648,6 +1721,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (rc)
return rc;
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
req->CtlCode = cpu_to_le32(opcode);
req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid;
@@ -1659,9 +1735,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer) - 4);
iov[1].iov_base = in_data;
iov[1].iov_len = indatalen;
- num_iovecs = 2;
+ n_iov = 2;
} else
- num_iovecs = 1;
+ n_iov = 1;
req->OutputOffset = 0;
req->OutputCount = 0; /* MBZ */
@@ -1698,8 +1774,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
iov[0].iov_len = get_rfc1002_length(req) + 4;
- rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
- rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
+ rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base;
if ((rc != 0) && (rc != -EINVAL)) {
cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
@@ -1742,9 +1819,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
goto ioctl_exit;
}
- memcpy(*out_data,
- (char *)&rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
- *plen);
+ shdr = get_sync_hdr(rsp);
+ memcpy(*out_data, (char *)shdr + le32_to_cpu(rsp->OutputOffset), *plen);
ioctl_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
@@ -1784,8 +1860,10 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
struct TCP_Server_Info *server;
struct cifs_ses *ses = tcon->ses;
struct kvec iov[1];
+ struct kvec rsp_iov;
int resp_buftype;
int rc = 0;
+ int flags = 0;
cifs_dbg(FYI, "Close\n");
@@ -1798,6 +1876,9 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid;
@@ -1805,8 +1886,9 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
/* 4 for rfc1002 length field */
iov[0].iov_len = get_rfc1002_length(req) + 4;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
- rsp = (struct smb2_close_rsp *)iov[0].iov_base;
+ rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_close_rsp *)rsp_iov.iov_base;
if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE);
@@ -1885,10 +1967,12 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
struct smb2_query_info_req *req;
struct smb2_query_info_rsp *rsp = NULL;
struct kvec iov[2];
+ struct kvec rsp_iov;
int rc = 0;
int resp_buftype;
struct TCP_Server_Info *server;
struct cifs_ses *ses = tcon->ses;
+ int flags = 0;
cifs_dbg(FYI, "Query Info\n");
@@ -1901,6 +1985,9 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
req->InfoType = SMB2_O_INFO_FILE;
req->FileInfoClass = info_class;
req->PersistentFileId = persistent_fid;
@@ -1914,8 +2001,9 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
/* 4 for rfc1002 length field */
iov[0].iov_len = get_rfc1002_length(req) + 4;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
- rsp = (struct smb2_query_info_rsp *)iov[0].iov_base;
+ rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
@@ -1963,11 +2051,11 @@ static void
smb2_echo_callback(struct mid_q_entry *mid)
{
struct TCP_Server_Info *server = mid->callback_data;
- struct smb2_echo_rsp *smb2 = (struct smb2_echo_rsp *)mid->resp_buf;
+ struct smb2_echo_rsp *rsp = (struct smb2_echo_rsp *)mid->resp_buf;
unsigned int credits_received = 1;
if (mid->mid_state == MID_RESPONSE_RECEIVED)
- credits_received = le16_to_cpu(smb2->hdr.CreditRequest);
+ credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
mutex_lock(&server->srv_mutex);
DeleteMidQEntry(mid);
@@ -2029,9 +2117,9 @@ SMB2_echo(struct TCP_Server_Info *server)
{
struct smb2_echo_req *req;
int rc = 0;
- struct kvec iov;
- struct smb_rqst rqst = { .rq_iov = &iov,
- .rq_nvec = 1 };
+ struct kvec iov[2];
+ struct smb_rqst rqst = { .rq_iov = iov,
+ .rq_nvec = 2 };
cifs_dbg(FYI, "In echo request\n");
@@ -2045,14 +2133,16 @@ SMB2_echo(struct TCP_Server_Info *server)
if (rc)
return rc;
- req->hdr.CreditRequest = cpu_to_le16(1);
+ req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1);
- iov.iov_base = (char *)req;
/* 4 for rfc1002 length field */
- iov.iov_len = get_rfc1002_length(req) + 4;
+ iov[0].iov_len = 4;
+ iov[0].iov_base = (char *)req;
+ iov[1].iov_len = get_rfc1002_length(req);
+ iov[1].iov_base = (char *)req + 4;
- rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, server,
- CIFS_ECHO_OP);
+ rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, NULL,
+ server, CIFS_ECHO_OP);
if (rc)
cifs_dbg(FYI, "Echo request failed: %d\n", rc);
@@ -2068,8 +2158,10 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
struct TCP_Server_Info *server;
struct cifs_ses *ses = tcon->ses;
struct kvec iov[1];
+ struct kvec rsp_iov;
int resp_buftype;
int rc = 0;
+ int flags = 0;
cifs_dbg(FYI, "Flush\n");
@@ -2082,6 +2174,9 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (rc)
return rc;
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid;
@@ -2089,12 +2184,13 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
/* 4 for rfc1002 length field */
iov[0].iov_len = get_rfc1002_length(req) + 4;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
+ rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
if (rc != 0)
cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
- free_rsp_buf(resp_buftype, iov[0].iov_base);
+ free_rsp_buf(resp_buftype, rsp_iov.iov_base);
return rc;
}
@@ -2103,19 +2199,23 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
* have the end_of_chain boolean set to true.
*/
static int
-smb2_new_read_req(struct kvec *iov, struct cifs_io_parms *io_parms,
- unsigned int remaining_bytes, int request_type)
+smb2_new_read_req(void **buf, unsigned int *total_len,
+ struct cifs_io_parms *io_parms, unsigned int remaining_bytes,
+ int request_type)
{
int rc = -EACCES;
- struct smb2_read_req *req = NULL;
+ struct smb2_read_plain_req *req = NULL;
+ struct smb2_sync_hdr *shdr;
- rc = small_smb2_init(SMB2_READ, io_parms->tcon, (void **) &req);
+ rc = smb2_plain_req_init(SMB2_READ, io_parms->tcon, (void **) &req,
+ total_len);
if (rc)
return rc;
if (io_parms->tcon->ses->server == NULL)
return -ECONNABORTED;
- req->hdr.ProcessId = cpu_to_le32(io_parms->pid);
+ shdr = &req->sync_hdr;
+ shdr->ProcessId = cpu_to_le32(io_parms->pid);
req->PersistentFileId = io_parms->persistent_fid;
req->VolatileFileId = io_parms->volatile_fid;
@@ -2128,19 +2228,19 @@ smb2_new_read_req(struct kvec *iov, struct cifs_io_parms *io_parms,
if (request_type & CHAINED_REQUEST) {
if (!(request_type & END_OF_CHAIN)) {
- /* 4 for rfc1002 length field */
- req->hdr.NextCommand =
- cpu_to_le32(get_rfc1002_length(req) + 4);
+ /* next 8-byte aligned request */
+ *total_len = DIV_ROUND_UP(*total_len, 8) * 8;
+ shdr->NextCommand = cpu_to_le32(*total_len);
} else /* END_OF_CHAIN */
- req->hdr.NextCommand = 0;
+ shdr->NextCommand = 0;
if (request_type & RELATED_REQUEST) {
- req->hdr.Flags |= SMB2_FLAGS_RELATED_OPERATIONS;
+ shdr->Flags |= SMB2_FLAGS_RELATED_OPERATIONS;
/*
* Related requests use info from previous read request
* in chain.
*/
- req->hdr.SessionId = 0xFFFFFFFF;
- req->hdr.TreeId = 0xFFFFFFFF;
+ shdr->SessionId = 0xFFFFFFFF;
+ shdr->TreeId = 0xFFFFFFFF;
req->PersistentFileId = 0xFFFFFFFF;
req->VolatileFileId = 0xFFFFFFFF;
}
@@ -2150,9 +2250,7 @@ smb2_new_read_req(struct kvec *iov, struct cifs_io_parms *io_parms,
else
req->RemainingBytes = 0;
- iov[0].iov_base = (char *)req;
- /* 4 for rfc1002 length field */
- iov[0].iov_len = get_rfc1002_length(req) + 4;
+ *buf = req;
return rc;
}
@@ -2162,10 +2260,11 @@ smb2_readv_callback(struct mid_q_entry *mid)
struct cifs_readdata *rdata = mid->callback_data;
struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
- struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov.iov_base;
+ struct smb2_sync_hdr *shdr =
+ (struct smb2_sync_hdr *)rdata->iov[1].iov_base;
unsigned int credits_received = 1;
- struct smb_rqst rqst = { .rq_iov = &rdata->iov,
- .rq_nvec = 1,
+ struct smb_rqst rqst = { .rq_iov = rdata->iov,
+ .rq_nvec = 2,
.rq_pages = rdata->pages,
.rq_npages = rdata->nr_pages,
.rq_pagesz = rdata->pagesz,
@@ -2177,9 +2276,9 @@ smb2_readv_callback(struct mid_q_entry *mid)
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
- credits_received = le16_to_cpu(buf->CreditRequest);
+ credits_received = le16_to_cpu(shdr->CreditRequest);
/* result already set, check signature */
- if (server->sign) {
+ if (server->sign && !mid->decrypted) {
int rc;
rc = smb2_verify_signature(&rqst, server);
@@ -2216,16 +2315,19 @@ smb2_readv_callback(struct mid_q_entry *mid)
add_credits(server, credits_received, 0);
}
-/* smb2_async_readv - send an async write, and set up mid to handle result */
+/* smb2_async_readv - send an async read, and set up mid to handle result */
int
smb2_async_readv(struct cifs_readdata *rdata)
{
int rc, flags = 0;
- struct smb2_hdr *buf;
+ char *buf;
+ struct smb2_sync_hdr *shdr;
struct cifs_io_parms io_parms;
- struct smb_rqst rqst = { .rq_iov = &rdata->iov,
- .rq_nvec = 1 };
+ struct smb_rqst rqst = { .rq_iov = rdata->iov,
+ .rq_nvec = 2 };
struct TCP_Server_Info *server;
+ unsigned int total_len;
+ __be32 req_len;
cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n",
__func__, rdata->offset, rdata->bytes);
@@ -2239,7 +2341,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
server = io_parms.tcon->ses->server;
- rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0);
+ rc = smb2_new_read_req((void **) &buf, &total_len, &io_parms, 0, 0);
if (rc) {
if (rc == -EAGAIN && rdata->credits) {
/* credits was reset by reconnect */
@@ -2252,26 +2354,34 @@ smb2_async_readv(struct cifs_readdata *rdata)
return rc;
}
- buf = (struct smb2_hdr *)rdata->iov.iov_base;
- /* 4 for rfc1002 length field */
- rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4;
+ if (encryption_required(io_parms.tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ req_len = cpu_to_be32(total_len);
+
+ rdata->iov[0].iov_base = &req_len;
+ rdata->iov[0].iov_len = sizeof(__be32);
+ rdata->iov[1].iov_base = buf;
+ rdata->iov[1].iov_len = total_len;
+
+ shdr = (struct smb2_sync_hdr *)buf;
if (rdata->credits) {
- buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
+ shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- buf->CreditRequest = buf->CreditCharge;
+ shdr->CreditRequest = shdr->CreditCharge;
spin_lock(&server->req_lock);
server->credits += rdata->credits -
- le16_to_cpu(buf->CreditCharge);
+ le16_to_cpu(shdr->CreditCharge);
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
- flags = CIFS_HAS_CREDITS;
+ flags |= CIFS_HAS_CREDITS;
}
kref_get(&rdata->refcount);
rc = cifs_call_async(io_parms.tcon->ses->server, &rqst,
cifs_readv_receive, smb2_readv_callback,
- rdata, flags);
+ smb3_handle_read_data, rdata, flags);
if (rc) {
kref_put(&rdata->refcount, cifs_readdata_release);
cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
@@ -2286,21 +2396,41 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
unsigned int *nbytes, char **buf, int *buf_type)
{
int resp_buftype, rc = -EACCES;
+ struct smb2_read_plain_req *req = NULL;
struct smb2_read_rsp *rsp = NULL;
- struct kvec iov[1];
+ struct smb2_sync_hdr *shdr;
+ struct kvec iov[2];
+ struct kvec rsp_iov;
+ unsigned int total_len;
+ __be32 req_len;
+ struct smb_rqst rqst = { .rq_iov = iov,
+ .rq_nvec = 2 };
+ int flags = CIFS_LOG_ERROR;
+ struct cifs_ses *ses = io_parms->tcon->ses;
*nbytes = 0;
- rc = smb2_new_read_req(iov, io_parms, 0, 0);
+ rc = smb2_new_read_req((void **)&req, &total_len, io_parms, 0, 0);
if (rc)
return rc;
- rc = SendReceive2(xid, io_parms->tcon->ses, iov, 1,
- &resp_buftype, CIFS_LOG_ERROR);
+ if (encryption_required(io_parms->tcon))
+ flags |= CIFS_TRANSFORM_REQ;
- rsp = (struct smb2_read_rsp *)iov[0].iov_base;
+ req_len = cpu_to_be32(total_len);
- if (rsp->hdr.Status == STATUS_END_OF_FILE) {
- free_rsp_buf(resp_buftype, iov[0].iov_base);
+ iov[0].iov_base = &req_len;
+ iov[0].iov_len = sizeof(__be32);
+ iov[1].iov_base = req;
+ iov[1].iov_len = total_len;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+
+ rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
+ shdr = get_sync_hdr(rsp);
+
+ if (shdr->Status == STATUS_END_OF_FILE) {
+ free_rsp_buf(resp_buftype, rsp_iov.iov_base);
return 0;
}
@@ -2319,11 +2449,10 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
}
if (*buf) {
- memcpy(*buf, (char *)&rsp->hdr.ProtocolId + rsp->DataOffset,
- *nbytes);
- free_rsp_buf(resp_buftype, iov[0].iov_base);
+ memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);
+ free_rsp_buf(resp_buftype, rsp_iov.iov_base);
} else if (resp_buftype != CIFS_NO_BUFFER) {
- *buf = iov[0].iov_base;
+ *buf = rsp_iov.iov_base;
if (resp_buftype == CIFS_SMALL_BUFFER)
*buf_type = CIFS_SMALL_BUFFER;
else if (resp_buftype == CIFS_LARGE_BUFFER)
@@ -2348,7 +2477,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
- credits_received = le16_to_cpu(rsp->hdr.CreditRequest);
+ credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
wdata->result = smb2_check_receive(mid, tcon->ses->server, 0);
if (wdata->result != 0)
break;
@@ -2394,10 +2523,11 @@ smb2_async_writev(struct cifs_writedata *wdata,
{
int rc = -EACCES, flags = 0;
struct smb2_write_req *req = NULL;
+ struct smb2_sync_hdr *shdr;
struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
- struct kvec iov;
- struct smb_rqst rqst;
+ struct kvec iov[2];
+ struct smb_rqst rqst = { };
rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
if (rc) {
@@ -2412,7 +2542,11 @@ smb2_async_writev(struct cifs_writedata *wdata,
goto async_writev_out;
}
- req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid);
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ shdr = get_sync_hdr(req);
+ shdr->ProcessId = cpu_to_le32(wdata->cfile->pid);
req->PersistentFileId = wdata->cfile->fid.persistent_fid;
req->VolatileFileId = wdata->cfile->fid.volatile_fid;
@@ -2426,11 +2560,13 @@ smb2_async_writev(struct cifs_writedata *wdata,
req->RemainingBytes = 0;
/* 4 for rfc1002 length field and 1 for Buffer */
- iov.iov_len = get_rfc1002_length(req) + 4 - 1;
- iov.iov_base = req;
+ iov[0].iov_len = 4;
+ iov[0].iov_base = req;
+ iov[1].iov_len = get_rfc1002_length(req) - 1;
+ iov[1].iov_base = (char *)req + 4;
- rqst.rq_iov = &iov;
- rqst.rq_nvec = 1;
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 2;
rqst.rq_pages = wdata->pages;
rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz;
@@ -2444,20 +2580,20 @@ smb2_async_writev(struct cifs_writedata *wdata,
inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */);
if (wdata->credits) {
- req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
+ shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- req->hdr.CreditRequest = req->hdr.CreditCharge;
+ shdr->CreditRequest = shdr->CreditCharge;
spin_lock(&server->req_lock);
server->credits += wdata->credits -
- le16_to_cpu(req->hdr.CreditCharge);
+ le16_to_cpu(shdr->CreditCharge);
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
- flags = CIFS_HAS_CREDITS;
+ flags |= CIFS_HAS_CREDITS;
}
kref_get(&wdata->refcount);
- rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, wdata,
- flags);
+ rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, NULL,
+ wdata, flags);
if (rc) {
kref_put(&wdata->refcount, release);
@@ -2483,6 +2619,9 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
struct smb2_write_req *req = NULL;
struct smb2_write_rsp *rsp = NULL;
int resp_buftype;
+ struct kvec rsp_iov;
+ int flags = 0;
+
*nbytes = 0;
if (n_vec < 1)
@@ -2495,7 +2634,10 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
if (io_parms->tcon->ses->server == NULL)
return -ECONNABORTED;
- req->hdr.ProcessId = cpu_to_le32(io_parms->pid);
+ if (encryption_required(io_parms->tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ req->hdr.sync_hdr.ProcessId = cpu_to_le32(io_parms->pid);
req->PersistentFileId = io_parms->persistent_fid;
req->VolatileFileId = io_parms->volatile_fid;
@@ -2517,8 +2659,9 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
inc_rfc1001_len(req, io_parms->length - 1 /* Buffer */);
rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1,
- &resp_buftype, 0);
- rsp = (struct smb2_write_rsp *)iov[0].iov_base;
+ &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
if (rc) {
cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE);
@@ -2581,6 +2724,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
struct smb2_query_directory_req *req;
struct smb2_query_directory_rsp *rsp = NULL;
struct kvec iov[2];
+ struct kvec rsp_iov;
int rc = 0;
int len;
int resp_buftype = CIFS_NO_BUFFER;
@@ -2591,6 +2735,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
char *end_of_smb;
unsigned int output_size = CIFSMaxBufSize;
size_t info_buf_size;
+ int flags = 0;
if (ses && (ses->server))
server = ses->server;
@@ -2601,6 +2746,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
switch (srch_inf->info_level) {
case SMB_FIND_FILE_DIRECTORY_INFO:
req->FileInformationClass = FILE_DIRECTORY_INFORMATION;
@@ -2645,11 +2793,13 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
inc_rfc1001_len(req, len - 1 /* Buffer */);
- rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0);
- rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
+ rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base;
if (rc) {
- if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
+ if (rc == -ENODATA &&
+ rsp->hdr.sync_hdr.Status == STATUS_NO_MORE_FILES) {
srch_inf->endOfSearch = true;
rc = 0;
}
@@ -2705,11 +2855,13 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
struct smb2_set_info_req *req;
struct smb2_set_info_rsp *rsp = NULL;
struct kvec *iov;
+ struct kvec rsp_iov;
int rc = 0;
int resp_buftype;
unsigned int i;
struct TCP_Server_Info *server;
struct cifs_ses *ses = tcon->ses;
+ int flags = 0;
if (ses && (ses->server))
server = ses->server;
@@ -2729,7 +2881,10 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
- req->hdr.ProcessId = cpu_to_le32(pid);
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid);
req->InfoType = SMB2_O_INFO_FILE;
req->FileInfoClass = info_class;
@@ -2756,8 +2911,9 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
iov[i].iov_len = size[i];
}
- rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0);
- rsp = (struct smb2_set_info_rsp *)iov[0].iov_base;
+ rc = SendReceive2(xid, ses, iov, num, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
if (rc != 0)
cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
@@ -2885,20 +3041,23 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
{
int rc;
struct smb2_oplock_break *req = NULL;
+ int flags = CIFS_OBREAK_OP;
cifs_dbg(FYI, "SMB2_oplock_break\n");
rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
-
if (rc)
return rc;
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
req->VolatileFid = volatile_fid;
req->PersistentFid = persistent_fid;
req->OplockLevel = oplock_level;
- req->hdr.CreditRequest = cpu_to_le16(1);
+ req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1);
- rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP);
- /* SMB2 buffer freed by function above */
+ rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, flags);
+ cifs_small_buf_release(req);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
@@ -2958,10 +3117,12 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
{
struct smb2_query_info_rsp *rsp = NULL;
struct kvec iov;
+ struct kvec rsp_iov;
int rc = 0;
int resp_buftype;
struct cifs_ses *ses = tcon->ses;
struct smb2_fs_full_size_info *info = NULL;
+ int flags = 0;
rc = build_qfs_info_req(&iov, tcon, FS_FULL_SIZE_INFORMATION,
sizeof(struct smb2_fs_full_size_info),
@@ -2969,12 +3130,16 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0);
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(iov.iov_base);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
goto qfsinf_exit;
}
- rsp = (struct smb2_query_info_rsp *)iov.iov_base;
+ rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
info = (struct smb2_fs_full_size_info *)(4 /* RFC1001 len */ +
le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr);
@@ -2985,7 +3150,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
copy_fs_info_to_kstatfs(info, fsdata);
qfsinf_exit:
- free_rsp_buf(resp_buftype, iov.iov_base);
+ free_rsp_buf(resp_buftype, rsp_iov.iov_base);
return rc;
}
@@ -2995,10 +3160,12 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
{
struct smb2_query_info_rsp *rsp = NULL;
struct kvec iov;
+ struct kvec rsp_iov;
int rc = 0;
int resp_buftype, max_len, min_len;
struct cifs_ses *ses = tcon->ses;
unsigned int rsp_len, offset;
+ int flags = 0;
if (level == FS_DEVICE_INFORMATION) {
max_len = sizeof(FILE_SYSTEM_DEVICE_INFO);
@@ -3019,12 +3186,16 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0);
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(iov.iov_base);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
goto qfsattr_exit;
}
- rsp = (struct smb2_query_info_rsp *)iov.iov_base;
+ rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
rsp_len = le32_to_cpu(rsp->OutputBufferLength);
offset = le16_to_cpu(rsp->OutputBufferOffset);
@@ -3048,7 +3219,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
}
qfsattr_exit:
- free_rsp_buf(resp_buftype, iov.iov_base);
+ free_rsp_buf(resp_buftype, rsp_iov.iov_base);
return rc;
}
@@ -3060,8 +3231,10 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
struct smb2_lock_req *req = NULL;
struct kvec iov[2];
+ struct kvec rsp_iov;
int resp_buf_type;
unsigned int count;
+ int flags = CIFS_NO_RESP;
cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock);
@@ -3069,7 +3242,10 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
if (rc)
return rc;
- req->hdr.ProcessId = cpu_to_le32(pid);
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid);
req->LockCount = cpu_to_le16(num_lock);
req->PersistentFileId = persist_fid;
@@ -3085,7 +3261,9 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
iov[1].iov_len = count;
cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
- rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP);
+ rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, flags,
+ &rsp_iov);
+ cifs_small_buf_release(req);
if (rc) {
cifs_dbg(FYI, "Send error in smb2_lockv = %d\n", rc);
cifs_stats_fail_inc(tcon, SMB2_LOCK_HE);
@@ -3117,22 +3295,25 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
{
int rc;
struct smb2_lease_ack *req = NULL;
+ int flags = CIFS_OBREAK_OP;
cifs_dbg(FYI, "SMB2_lease_break\n");
rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
-
if (rc)
return rc;
- req->hdr.CreditRequest = cpu_to_le16(1);
+ if (encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1);
req->StructureSize = cpu_to_le16(36);
inc_rfc1001_len(req, 12);
memcpy(req->LeaseKey, lease_key, 16);
req->LeaseState = lease_state;
- rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP);
- /* SMB2 buffer freed by function above */
+ rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, flags);
+ cifs_small_buf_release(req);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index dc0d141f33e2..c03b252501a1 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -101,10 +101,7 @@
#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64)
-struct smb2_hdr {
- __be32 smb2_buf_length; /* big endian on wire */
- /* length is only two or three bytes - with
- one or two byte type preceding it that MBZ */
+struct smb2_sync_hdr {
__le32 ProtocolId; /* 0xFE 'S' 'M' 'B' */
__le16 StructureSize; /* 64 */
__le16 CreditCharge; /* MBZ */
@@ -120,16 +117,31 @@ struct smb2_hdr {
__u8 Signature[16];
} __packed;
+struct smb2_sync_pdu {
+ struct smb2_sync_hdr sync_hdr;
+ __le16 StructureSize2; /* size of wct area (varies, request specific) */
+} __packed;
+
+struct smb2_hdr {
+ __be32 smb2_buf_length; /* big endian on wire */
+ /* length is only two or three bytes - with */
+ /* one or two byte type preceding it that MBZ */
+ struct smb2_sync_hdr sync_hdr;
+} __packed;
+
struct smb2_pdu {
struct smb2_hdr hdr;
__le16 StructureSize2; /* size of wct area (varies, request specific) */
} __packed;
+#define SMB3_AES128CMM_NONCE 11
+#define SMB3_AES128GCM_NONCE 12
+
struct smb2_transform_hdr {
__be32 smb2_buf_length; /* big endian on wire */
/* length is only two or three bytes - with
one or two byte type preceding it that MBZ */
- __u8 ProtocolId[4]; /* 0xFD 'S' 'M' 'B' */
+ __le32 ProtocolId; /* 0xFD 'S' 'M' 'B' */
__u8 Signature[16];
__u8 Nonce[16];
__le32 OriginalMessageSize;
@@ -814,8 +826,9 @@ struct smb2_flush_rsp {
#define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */
#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000001 /* SMB3.02 or later */
-struct smb2_read_req {
- struct smb2_hdr hdr;
+/* SMB2 read request without RFC1001 length at the beginning */
+struct smb2_read_plain_req {
+ struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 49 */
__u8 Padding; /* offset from start of SMB2 header to place read */
__u8 Flags; /* MBZ unless SMB3.02 or later */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index f2d511a6971b..85fc7a789334 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -56,6 +56,10 @@ extern void smb2_echo_request(struct work_struct *work);
extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode);
extern bool smb2_is_valid_oplock_break(char *buffer,
struct TCP_Server_Info *srv);
+extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
+ __u64 ses_id);
+extern int smb3_handle_read_data(struct TCP_Server_Info *server,
+ struct mid_q_entry *mid);
extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
struct smb2_file_all_info *src);
@@ -97,6 +101,7 @@ extern int smb2_unlock_range(struct cifsFileInfo *cfile,
struct file_lock *flock, const unsigned int xid);
extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
extern void smb2_reconnect_server(struct work_struct *work);
+extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server);
/*
* SMB2 Worker functions - most of protocol specific implementation details
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index bc9a7b634643..7c3bb1bd7eed 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -31,6 +31,7 @@
#include <asm/processor.h>
#include <linux/mempool.h>
#include <linux/highmem.h>
+#include <crypto/aead.h>
#include "smb2pdu.h"
#include "cifsglob.h"
#include "cifsproto.h"
@@ -114,14 +115,14 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
return 0;
}
-static struct cifs_ses *
-smb2_find_smb_ses(struct smb2_hdr *smb2hdr, struct TCP_Server_Info *server)
+struct cifs_ses *
+smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id)
{
struct cifs_ses *ses;
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
- if (ses->Suid != smb2hdr->SessionId)
+ if (ses->Suid != ses_id)
continue;
spin_unlock(&cifs_tcp_ses_lock);
return ses;
@@ -131,7 +132,6 @@ smb2_find_smb_ses(struct smb2_hdr *smb2hdr, struct TCP_Server_Info *server)
return NULL;
}
-
int
smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
@@ -139,17 +139,17 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
unsigned char smb2_signature[SMB2_HMACSHA256_SIZE];
unsigned char *sigptr = smb2_signature;
struct kvec *iov = rqst->rq_iov;
- struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[1].iov_base;
struct cifs_ses *ses;
- ses = smb2_find_smb_ses(smb2_pdu, server);
+ ses = smb2_find_smb_ses(server, shdr->SessionId);
if (!ses) {
cifs_dbg(VFS, "%s: Could not find session\n", __func__);
return 0;
}
memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
- memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE);
+ memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
rc = smb2_crypto_shash_allocate(server);
if (rc) {
@@ -174,7 +174,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
&server->secmech.sdeschmacsha256->shash);
if (!rc)
- memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE);
+ memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
return rc;
}
@@ -356,17 +356,17 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
unsigned char smb3_signature[SMB2_CMACAES_SIZE];
unsigned char *sigptr = smb3_signature;
struct kvec *iov = rqst->rq_iov;
- struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[1].iov_base;
struct cifs_ses *ses;
- ses = smb2_find_smb_ses(smb2_pdu, server);
+ ses = smb2_find_smb_ses(server, shdr->SessionId);
if (!ses) {
cifs_dbg(VFS, "%s: Could not find session\n", __func__);
return 0;
}
memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE);
- memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE);
+ memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
rc = crypto_shash_setkey(server->secmech.cmacaes,
ses->smb3signingkey, SMB2_CMACAES_SIZE);
@@ -391,7 +391,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
&server->secmech.sdesccmacaes->shash);
if (!rc)
- memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE);
+ memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
return rc;
}
@@ -401,14 +401,15 @@ static int
smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
int rc = 0;
- struct smb2_hdr *smb2_pdu = rqst->rq_iov[0].iov_base;
+ struct smb2_sync_hdr *shdr =
+ (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
- if (!(smb2_pdu->Flags & SMB2_FLAGS_SIGNED) ||
+ if (!(shdr->Flags & SMB2_FLAGS_SIGNED) ||
server->tcpStatus == CifsNeedNegotiate)
return rc;
if (!server->session_estab) {
- strncpy(smb2_pdu->Signature, "BSRSPYL", 8);
+ strncpy(shdr->Signature, "BSRSPYL", 8);
return rc;
}
@@ -422,11 +423,12 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
unsigned int rc;
char server_response_sig[16];
- struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
+ struct smb2_sync_hdr *shdr =
+ (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
- if ((smb2_pdu->Command == SMB2_NEGOTIATE) ||
- (smb2_pdu->Command == SMB2_SESSION_SETUP) ||
- (smb2_pdu->Command == SMB2_OPLOCK_BREAK) ||
+ if ((shdr->Command == SMB2_NEGOTIATE) ||
+ (shdr->Command == SMB2_SESSION_SETUP) ||
+ (shdr->Command == SMB2_OPLOCK_BREAK) ||
(!server->session_estab))
return 0;
@@ -436,17 +438,17 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
*/
/* Do not need to verify session setups with signature "BSRSPYL " */
- if (memcmp(smb2_pdu->Signature, "BSRSPYL ", 8) == 0)
+ if (memcmp(shdr->Signature, "BSRSPYL ", 8) == 0)
cifs_dbg(FYI, "dummy signature received for smb command 0x%x\n",
- smb2_pdu->Command);
+ shdr->Command);
/*
* Save off the origiginal signature so we can modify the smb and check
* our calculated signature against what the server sent.
*/
- memcpy(server_response_sig, smb2_pdu->Signature, SMB2_SIGNATURE_SIZE);
+ memcpy(server_response_sig, shdr->Signature, SMB2_SIGNATURE_SIZE);
- memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE);
+ memset(shdr->Signature, 0, SMB2_SIGNATURE_SIZE);
mutex_lock(&server->srv_mutex);
rc = server->ops->calc_signature(rqst, server);
@@ -455,8 +457,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
if (rc)
return rc;
- if (memcmp(server_response_sig, smb2_pdu->Signature,
- SMB2_SIGNATURE_SIZE))
+ if (memcmp(server_response_sig, shdr->Signature, SMB2_SIGNATURE_SIZE))
return -EACCES;
else
return 0;
@@ -467,18 +468,19 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
* and when srv_mutex is held.
*/
static inline void
-smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr)
+smb2_seq_num_into_buf(struct TCP_Server_Info *server,
+ struct smb2_sync_hdr *shdr)
{
- unsigned int i, num = le16_to_cpu(hdr->CreditCharge);
+ unsigned int i, num = le16_to_cpu(shdr->CreditCharge);
- hdr->MessageId = get_next_mid64(server);
+ shdr->MessageId = get_next_mid64(server);
/* skip message numbers according to CreditCharge field */
for (i = 1; i < num; i++)
get_next_mid(server);
}
static struct mid_q_entry *
-smb2_mid_entry_alloc(const struct smb2_hdr *smb_buffer,
+smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr,
struct TCP_Server_Info *server)
{
struct mid_q_entry *temp;
@@ -493,9 +495,9 @@ smb2_mid_entry_alloc(const struct smb2_hdr *smb_buffer,
return temp;
else {
memset(temp, 0, sizeof(struct mid_q_entry));
- temp->mid = le64_to_cpu(smb_buffer->MessageId);
+ temp->mid = le64_to_cpu(shdr->MessageId);
temp->pid = current->pid;
- temp->command = smb_buffer->Command; /* Always LE */
+ temp->command = shdr->Command; /* Always LE */
temp->when_alloc = jiffies;
temp->server = server;
@@ -513,7 +515,7 @@ smb2_mid_entry_alloc(const struct smb2_hdr *smb_buffer,
}
static int
-smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_hdr *buf,
+smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_sync_hdr *shdr,
struct mid_q_entry **mid)
{
if (ses->server->tcpStatus == CifsExiting)
@@ -525,19 +527,19 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_hdr *buf,
}
if (ses->status == CifsNew) {
- if ((buf->Command != SMB2_SESSION_SETUP) &&
- (buf->Command != SMB2_NEGOTIATE))
+ if ((shdr->Command != SMB2_SESSION_SETUP) &&
+ (shdr->Command != SMB2_NEGOTIATE))
return -EAGAIN;
/* else ok - we are setting up session */
}
if (ses->status == CifsExiting) {
- if (buf->Command != SMB2_LOGOFF)
+ if (shdr->Command != SMB2_LOGOFF)
return -EAGAIN;
/* else ok - we are shutting down the session */
}
- *mid = smb2_mid_entry_alloc(buf, ses->server);
+ *mid = smb2_mid_entry_alloc(shdr, ses->server);
if (*mid == NULL)
return -ENOMEM;
spin_lock(&GlobalMid_Lock);
@@ -551,16 +553,18 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
bool log_error)
{
unsigned int len = get_rfc1002_length(mid->resp_buf);
- struct kvec iov;
- struct smb_rqst rqst = { .rq_iov = &iov,
- .rq_nvec = 1 };
+ struct kvec iov[2];
+ struct smb_rqst rqst = { .rq_iov = iov,
+ .rq_nvec = 2 };
- iov.iov_base = (char *)mid->resp_buf;
- iov.iov_len = get_rfc1002_length(mid->resp_buf) + 4;
+ iov[0].iov_base = (char *)mid->resp_buf;
+ iov[0].iov_len = 4;
+ iov[1].iov_base = (char *)mid->resp_buf + 4;
+ iov[1].iov_len = len;
dump_smb(mid->resp_buf, min_t(u32, 80, len));
/* convert the length into a more usable form */
- if (len > 24 && server->sign) {
+ if (len > 24 && server->sign && !mid->decrypted) {
int rc;
rc = smb2_verify_signature(&rqst, server);
@@ -576,12 +580,13 @@ struct mid_q_entry *
smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
{
int rc;
- struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
+ struct smb2_sync_hdr *shdr =
+ (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
struct mid_q_entry *mid;
- smb2_seq_num_into_buf(ses->server, hdr);
+ smb2_seq_num_into_buf(ses->server, shdr);
- rc = smb2_get_mid_entry(ses, hdr, &mid);
+ rc = smb2_get_mid_entry(ses, shdr, &mid);
if (rc)
return ERR_PTR(rc);
rc = smb2_sign_rqst(rqst, ses->server);
@@ -596,12 +601,13 @@ struct mid_q_entry *
smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
{
int rc;
- struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
+ struct smb2_sync_hdr *shdr =
+ (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
struct mid_q_entry *mid;
- smb2_seq_num_into_buf(server, hdr);
+ smb2_seq_num_into_buf(server, shdr);
- mid = smb2_mid_entry_alloc(hdr, server);
+ mid = smb2_mid_entry_alloc(shdr, server);
if (mid == NULL)
return ERR_PTR(-ENOMEM);
@@ -613,3 +619,33 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
return mid;
}
+
+int
+smb3_crypto_aead_allocate(struct TCP_Server_Info *server)
+{
+ struct crypto_aead *tfm;
+
+ if (!server->secmech.ccmaesencrypt) {
+ tfm = crypto_alloc_aead("ccm(aes)", 0, 0);
+ if (IS_ERR(tfm)) {
+ cifs_dbg(VFS, "%s: Failed to alloc encrypt aead\n",
+ __func__);
+ return PTR_ERR(tfm);
+ }
+ server->secmech.ccmaesencrypt = tfm;
+ }
+
+ if (!server->secmech.ccmaesdecrypt) {
+ tfm = crypto_alloc_aead("ccm(aes)", 0, 0);
+ if (IS_ERR(tfm)) {
+ crypto_free_aead(server->secmech.ccmaesencrypt);
+ server->secmech.ccmaesencrypt = NULL;
+ cifs_dbg(VFS, "%s: Failed to alloc decrypt aead\n",
+ __func__);
+ return PTR_ERR(tfm);
+ }
+ server->secmech.ccmaesdecrypt = tfm;
+ }
+
+ return 0;
+}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index fbb84c08e3cd..526f0533cb4e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -221,7 +221,7 @@ rqst_len(struct smb_rqst *rqst)
}
static int
-smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+__smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
{
int rc;
struct kvec *iov = rqst->rq_iov;
@@ -245,8 +245,12 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
return -EIO;
}
+ if (n_vec < 2)
+ return -EIO;
+
cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length);
dump_smb(iov[0].iov_base, iov[0].iov_len);
+ dump_smb(iov[1].iov_base, iov[1].iov_len);
/* cork the socket */
kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
@@ -309,24 +313,43 @@ uncork:
}
static int
-smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
+smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags)
{
- struct smb_rqst rqst = { .rq_iov = iov,
- .rq_nvec = n_vec };
+ struct smb_rqst cur_rqst;
+ int rc;
+
+ if (!(flags & CIFS_TRANSFORM_REQ))
+ return __smb_send_rqst(server, rqst);
+
+ if (!server->ops->init_transform_rq ||
+ !server->ops->free_transform_rq) {
+ cifs_dbg(VFS, "Encryption requested but transform callbacks are missed\n");
+ return -EIO;
+ }
+
+ rc = server->ops->init_transform_rq(server, &cur_rqst, rqst);
+ if (rc)
+ return rc;
- return smb_send_rqst(server, &rqst);
+ rc = __smb_send_rqst(server, &cur_rqst);
+ server->ops->free_transform_rq(&cur_rqst);
+ return rc;
}
int
smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
unsigned int smb_buf_length)
{
- struct kvec iov;
+ struct kvec iov[2];
+ struct smb_rqst rqst = { .rq_iov = iov,
+ .rq_nvec = 2 };
- iov.iov_base = smb_buffer;
- iov.iov_len = smb_buf_length + 4;
+ iov[0].iov_base = smb_buffer;
+ iov[0].iov_len = 4;
+ iov[1].iov_base = (char *)smb_buffer + 4;
+ iov[1].iov_len = smb_buf_length;
- return smb_sendv(server, &iov, 1);
+ return __smb_send_rqst(server, &rqst);
}
static int
@@ -454,6 +477,10 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
struct mid_q_entry *mid;
+ if (rqst->rq_iov[0].iov_len != 4 ||
+ rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
+ return ERR_PTR(-EIO);
+
/* enable signing if server requires it */
if (server->sign)
hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
@@ -478,7 +505,7 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
int
cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
mid_receive_t *receive, mid_callback_t *callback,
- void *cbdata, const int flags)
+ mid_handle_t *handle, void *cbdata, const int flags)
{
int rc, timeout, optype;
struct mid_q_entry *mid;
@@ -505,6 +532,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
mid->receive = receive;
mid->callback = callback;
mid->callback_data = cbdata;
+ mid->handle = handle;
mid->mid_state = MID_REQUEST_SUBMITTED;
/* put it on the pending_mid_q */
@@ -514,7 +542,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
cifs_in_send_inc(server);
- rc = smb_send_rqst(server, rqst);
+ rc = smb_send_rqst(server, rqst, flags);
cifs_in_send_dec(server);
cifs_save_when_sent(mid);
@@ -547,12 +575,13 @@ SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
{
int rc;
struct kvec iov[1];
+ struct kvec rsp_iov;
int resp_buf_type;
iov[0].iov_base = in_buf;
iov[0].iov_len = get_rfc1002_length(in_buf) + 4;
flags |= CIFS_NO_RESP;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
+ rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc);
return rc;
@@ -595,10 +624,11 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
}
static inline int
-send_cancel(struct TCP_Server_Info *server, void *buf, struct mid_q_entry *mid)
+send_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst,
+ struct mid_q_entry *mid)
{
return server->ops->send_cancel ?
- server->ops->send_cancel(server, buf, mid) : 0;
+ server->ops->send_cancel(server, rqst, mid) : 0;
}
int
@@ -611,13 +641,15 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
/* convert the length into a more usable form */
if (server->sign) {
- struct kvec iov;
+ struct kvec iov[2];
int rc = 0;
- struct smb_rqst rqst = { .rq_iov = &iov,
- .rq_nvec = 1 };
+ struct smb_rqst rqst = { .rq_iov = iov,
+ .rq_nvec = 2 };
- iov.iov_base = mid->resp_buf;
- iov.iov_len = len;
+ iov[0].iov_base = mid->resp_buf;
+ iov[0].iov_len = 4;
+ iov[1].iov_base = (char *)mid->resp_buf + 4;
+ iov[1].iov_len = len - 4;
/* FIXME: add code to kill session */
rc = cifs_verify_signature(&rqst, server,
mid->sequence_number);
@@ -637,6 +669,10 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
struct mid_q_entry *mid;
+ if (rqst->rq_iov[0].iov_len != 4 ||
+ rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
+ return ERR_PTR(-EIO);
+
rc = allocate_mid(ses, hdr, &mid);
if (rc)
return ERR_PTR(rc);
@@ -649,17 +685,15 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
}
int
-SendReceive2(const unsigned int xid, struct cifs_ses *ses,
- struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
- const int flags)
+cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ struct smb_rqst *rqst, int *resp_buf_type, const int flags,
+ struct kvec *resp_iov)
{
int rc = 0;
int timeout, optype;
struct mid_q_entry *midQ;
- char *buf = iov[0].iov_base;
unsigned int credits = 1;
- struct smb_rqst rqst = { .rq_iov = iov,
- .rq_nvec = n_vec };
+ char *buf;
timeout = flags & CIFS_TIMEOUT_MASK;
optype = flags & CIFS_OP_MASK;
@@ -667,15 +701,12 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
*resp_buf_type = CIFS_NO_BUFFER; /* no response buf yet */
if ((ses == NULL) || (ses->server == NULL)) {
- cifs_small_buf_release(buf);
cifs_dbg(VFS, "Null session\n");
return -EIO;
}
- if (ses->server->tcpStatus == CifsExiting) {
- cifs_small_buf_release(buf);
+ if (ses->server->tcpStatus == CifsExiting)
return -ENOENT;
- }
/*
* Ensure that we do not send more than 50 overlapping requests
@@ -684,10 +715,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
*/
rc = wait_for_free_request(ses->server, timeout, optype);
- if (rc) {
- cifs_small_buf_release(buf);
+ if (rc)
return rc;
- }
/*
* Make sure that we sign in the same order that we send on this socket
@@ -697,10 +726,9 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
mutex_lock(&ses->server->srv_mutex);
- midQ = ses->server->ops->setup_request(ses, &rqst);
+ midQ = ses->server->ops->setup_request(ses, rqst);
if (IS_ERR(midQ)) {
mutex_unlock(&ses->server->srv_mutex);
- cifs_small_buf_release(buf);
/* Update # of requests on wire to server */
add_credits(ses->server, 1, optype);
return PTR_ERR(midQ);
@@ -708,7 +736,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
midQ->mid_state = MID_REQUEST_SUBMITTED;
cifs_in_send_inc(ses->server);
- rc = smb_sendv(ses->server, iov, n_vec);
+ rc = smb_send_rqst(ses->server, rqst, flags);
cifs_in_send_dec(ses->server);
cifs_save_when_sent(midQ);
@@ -716,32 +744,25 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
ses->server->sequence_number -= 2;
mutex_unlock(&ses->server->srv_mutex);
- if (rc < 0) {
- cifs_small_buf_release(buf);
+ if (rc < 0)
goto out;
- }
- if (timeout == CIFS_ASYNC_OP) {
- cifs_small_buf_release(buf);
+ if (timeout == CIFS_ASYNC_OP)
goto out;
- }
rc = wait_for_response(ses->server, midQ);
if (rc != 0) {
- send_cancel(ses->server, buf, midQ);
+ send_cancel(ses->server, rqst, midQ);
spin_lock(&GlobalMid_Lock);
if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
midQ->callback = DeleteMidQEntry;
spin_unlock(&GlobalMid_Lock);
- cifs_small_buf_release(buf);
add_credits(ses->server, 1, optype);
return rc;
}
spin_unlock(&GlobalMid_Lock);
}
- cifs_small_buf_release(buf);
-
rc = cifs_sync_mid_result(midQ, ses->server);
if (rc != 0) {
add_credits(ses->server, 1, optype);
@@ -755,8 +776,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
}
buf = (char *)midQ->resp_buf;
- iov[0].iov_base = buf;
- iov[0].iov_len = get_rfc1002_length(buf) + 4;
+ resp_iov->iov_base = buf;
+ resp_iov->iov_len = get_rfc1002_length(buf) + 4;
if (midQ->large_buf)
*resp_buf_type = CIFS_LARGE_BUFFER;
else
@@ -778,12 +799,45 @@ out:
}
int
+SendReceive2(const unsigned int xid, struct cifs_ses *ses,
+ struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
+ const int flags, struct kvec *resp_iov)
+{
+ struct smb_rqst rqst;
+ struct kvec *new_iov;
+ int rc;
+
+ new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1), GFP_KERNEL);
+ if (!new_iov)
+ return -ENOMEM;
+
+ /* 1st iov is a RFC1001 length followed by the rest of the packet */
+ memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec));
+
+ new_iov[0].iov_base = new_iov[1].iov_base;
+ new_iov[0].iov_len = 4;
+ new_iov[1].iov_base += 4;
+ new_iov[1].iov_len -= 4;
+
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = new_iov;
+ rqst.rq_nvec = n_vec + 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov);
+ kfree(new_iov);
+ return rc;
+}
+
+int
SendReceive(const unsigned int xid, struct cifs_ses *ses,
struct smb_hdr *in_buf, struct smb_hdr *out_buf,
int *pbytes_returned, const int timeout)
{
int rc = 0;
struct mid_q_entry *midQ;
+ unsigned int len = be32_to_cpu(in_buf->smb_buf_length);
+ struct kvec iov = { .iov_base = in_buf, .iov_len = len };
+ struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 };
if (ses == NULL) {
cifs_dbg(VFS, "Null smb session\n");
@@ -801,10 +855,9 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
to the same server. We may make this configurable later or
use ses->maxReq */
- if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
- MAX_CIFS_HDR_SIZE - 4) {
+ if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
cifs_dbg(VFS, "Illegal length, greater than maximum frame, %d\n",
- be32_to_cpu(in_buf->smb_buf_length));
+ len);
return -EIO;
}
@@ -835,7 +888,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
midQ->mid_state = MID_REQUEST_SUBMITTED;
cifs_in_send_inc(ses->server);
- rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
+ rc = smb_send(ses->server, in_buf, len);
cifs_in_send_dec(ses->server);
cifs_save_when_sent(midQ);
@@ -852,7 +905,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
rc = wait_for_response(ses->server, midQ);
if (rc != 0) {
- send_cancel(ses->server, in_buf, midQ);
+ send_cancel(ses->server, &rqst, midQ);
spin_lock(&GlobalMid_Lock);
if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
/* no longer considered to be "in-flight" */
@@ -921,6 +974,9 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
int rstart = 0;
struct mid_q_entry *midQ;
struct cifs_ses *ses;
+ unsigned int len = be32_to_cpu(in_buf->smb_buf_length);
+ struct kvec iov = { .iov_base = in_buf, .iov_len = len };
+ struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 };
if (tcon == NULL || tcon->ses == NULL) {
cifs_dbg(VFS, "Null smb session\n");
@@ -940,10 +996,9 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
to the same server. We may make this configurable later or
use ses->maxReq */
- if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
- MAX_CIFS_HDR_SIZE - 4) {
+ if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
cifs_dbg(VFS, "Illegal length, greater than maximum frame, %d\n",
- be32_to_cpu(in_buf->smb_buf_length));
+ len);
return -EIO;
}
@@ -972,7 +1027,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
midQ->mid_state = MID_REQUEST_SUBMITTED;
cifs_in_send_inc(ses->server);
- rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
+ rc = smb_send(ses->server, in_buf, len);
cifs_in_send_dec(ses->server);
cifs_save_when_sent(midQ);
@@ -1001,7 +1056,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
if (in_buf->Command == SMB_COM_TRANSACTION2) {
/* POSIX lock. We send a NT_CANCEL SMB to cause the
blocking lock to return. */
- rc = send_cancel(ses->server, in_buf, midQ);
+ rc = send_cancel(ses->server, &rqst, midQ);
if (rc) {
cifs_delete_mid(midQ);
return rc;
@@ -1022,7 +1077,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
rc = wait_for_response(ses->server, midQ);
if (rc) {
- send_cancel(ses->server, in_buf, midQ);
+ send_cancel(ses->server, &rqst, midQ);
spin_lock(&GlobalMid_Lock);
if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
/* no longer considered to be "in-flight" */
diff --git a/fs/coredump.c b/fs/coredump.c
index e525b6017cdf..ae6b05629ca1 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -833,3 +833,21 @@ int dump_align(struct coredump_params *cprm, int align)
return mod ? dump_skip(cprm, align - mod) : 1;
}
EXPORT_SYMBOL(dump_align);
+
+/*
+ * Ensures that file size is big enough to contain the current file
+ * postion. This prevents gdb from complaining about a truncated file
+ * if the last "write" to the file was dump_skip.
+ */
+void dump_truncate(struct coredump_params *cprm)
+{
+ struct file *file = cprm->file;
+ loff_t offset;
+
+ if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+ offset = file->f_op->llseek(file, 0, SEEK_CUR);
+ if (i_size_read(file->f_mapping->host) < offset)
+ do_truncate(file->f_path.dentry, offset, 0, file);
+ }
+}
+EXPORT_SYMBOL(dump_truncate);
diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
index f514978f6688..08b46e6e3995 100644
--- a/fs/crypto/Kconfig
+++ b/fs/crypto/Kconfig
@@ -1,6 +1,5 @@
config FS_ENCRYPTION
tristate "FS Encryption (Per-file encryption)"
- depends on BLOCK
select CRYPTO
select CRYPTO_AES
select CRYPTO_CBC
diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile
index f17684c48739..9f6607f17b53 100644
--- a/fs/crypto/Makefile
+++ b/fs/crypto/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o
fscrypto-y := crypto.o fname.o policy.o keyinfo.o
+fscrypto-$(CONFIG_BLOCK) += bio.o
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
new file mode 100644
index 000000000000..a409a84f1bca
--- /dev/null
+++ b/fs/crypto/bio.c
@@ -0,0 +1,145 @@
+/*
+ * This contains encryption functions for per-file encryption.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility
+ *
+ * Written by Michael Halcrow, 2014.
+ *
+ * Filename encryption additions
+ * Uday Savagaonkar, 2014
+ * Encryption policy handling additions
+ * Ildar Muslukhov, 2014
+ * Add fscrypt_pullback_bio_page()
+ * Jaegeuk Kim, 2015.
+ *
+ * This has not yet undergone a rigorous security audit.
+ *
+ * The usage of AES-XTS should conform to recommendations in NIST
+ * Special Publication 800-38E and IEEE P1619/D16.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/module.h>
+#include <linux/bio.h>
+#include <linux/namei.h>
+#include "fscrypt_private.h"
+
+/*
+ * Call fscrypt_decrypt_page on every single page, reusing the encryption
+ * context.
+ */
+static void completion_pages(struct work_struct *work)
+{
+ struct fscrypt_ctx *ctx =
+ container_of(work, struct fscrypt_ctx, r.work);
+ struct bio *bio = ctx->r.bio;
+ struct bio_vec *bv;
+ int i;
+
+ bio_for_each_segment_all(bv, bio, i) {
+ struct page *page = bv->bv_page;
+ int ret = fscrypt_decrypt_page(page->mapping->host, page,
+ PAGE_SIZE, 0, page->index);
+
+ if (ret) {
+ WARN_ON_ONCE(1);
+ SetPageError(page);
+ } else {
+ SetPageUptodate(page);
+ }
+ unlock_page(page);
+ }
+ fscrypt_release_ctx(ctx);
+ bio_put(bio);
+}
+
+void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
+{
+ INIT_WORK(&ctx->r.work, completion_pages);
+ ctx->r.bio = bio;
+ queue_work(fscrypt_read_workqueue, &ctx->r.work);
+}
+EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
+
+void fscrypt_pullback_bio_page(struct page **page, bool restore)
+{
+ struct fscrypt_ctx *ctx;
+ struct page *bounce_page;
+
+ /* The bounce data pages are unmapped. */
+ if ((*page)->mapping)
+ return;
+
+ /* The bounce data page is unmapped. */
+ bounce_page = *page;
+ ctx = (struct fscrypt_ctx *)page_private(bounce_page);
+
+ /* restore control page */
+ *page = ctx->w.control_page;
+
+ if (restore)
+ fscrypt_restore_control_page(bounce_page);
+}
+EXPORT_SYMBOL(fscrypt_pullback_bio_page);
+
+int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
+ sector_t pblk, unsigned int len)
+{
+ struct fscrypt_ctx *ctx;
+ struct page *ciphertext_page = NULL;
+ struct bio *bio;
+ int ret, err = 0;
+
+ BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
+
+ ctx = fscrypt_get_ctx(inode, GFP_NOFS);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT);
+ if (IS_ERR(ciphertext_page)) {
+ err = PTR_ERR(ciphertext_page);
+ goto errout;
+ }
+
+ while (len--) {
+ err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk,
+ ZERO_PAGE(0), ciphertext_page,
+ PAGE_SIZE, 0, GFP_NOFS);
+ if (err)
+ goto errout;
+
+ bio = bio_alloc(GFP_NOWAIT, 1);
+ if (!bio) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ bio->bi_bdev = inode->i_sb->s_bdev;
+ bio->bi_iter.bi_sector =
+ pblk << (inode->i_sb->s_blocksize_bits - 9);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ ret = bio_add_page(bio, ciphertext_page,
+ inode->i_sb->s_blocksize, 0);
+ if (ret != inode->i_sb->s_blocksize) {
+ /* should never happen! */
+ WARN_ON(1);
+ bio_put(bio);
+ err = -EIO;
+ goto errout;
+ }
+ err = submit_bio_wait(bio);
+ if ((err == 0) && bio->bi_error)
+ err = -EIO;
+ bio_put(bio);
+ if (err)
+ goto errout;
+ lblk++;
+ pblk++;
+ }
+ err = 0;
+errout:
+ fscrypt_release_ctx(ctx);
+ return err;
+}
+EXPORT_SYMBOL(fscrypt_zeroout_range);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index ac8e4f6a3773..02a7a9286449 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -24,7 +24,6 @@
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
-#include <linux/bio.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include "fscrypt_private.h"
@@ -44,7 +43,7 @@ static mempool_t *fscrypt_bounce_page_pool = NULL;
static LIST_HEAD(fscrypt_free_ctxs);
static DEFINE_SPINLOCK(fscrypt_ctx_lock);
-static struct workqueue_struct *fscrypt_read_workqueue;
+struct workqueue_struct *fscrypt_read_workqueue;
static DEFINE_MUTEX(fscrypt_init_mutex);
static struct kmem_cache *fscrypt_ctx_cachep;
@@ -141,16 +140,10 @@ static void page_crypt_complete(struct crypto_async_request *req, int res)
complete(&ecr->completion);
}
-typedef enum {
- FS_DECRYPT = 0,
- FS_ENCRYPT,
-} fscrypt_direction_t;
-
-static int do_page_crypto(const struct inode *inode,
- fscrypt_direction_t rw, u64 lblk_num,
- struct page *src_page, struct page *dest_page,
- unsigned int len, unsigned int offs,
- gfp_t gfp_flags)
+int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
+ u64 lblk_num, struct page *src_page,
+ struct page *dest_page, unsigned int len,
+ unsigned int offs, gfp_t gfp_flags)
{
struct {
__le64 index;
@@ -205,7 +198,8 @@ static int do_page_crypto(const struct inode *inode,
return 0;
}
-static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
+struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
+ gfp_t gfp_flags)
{
ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
if (ctx->w.bounce_page == NULL)
@@ -260,9 +254,9 @@ struct page *fscrypt_encrypt_page(const struct inode *inode,
if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) {
/* with inplace-encryption we just encrypt the page */
- err = do_page_crypto(inode, FS_ENCRYPT, lblk_num,
- page, ciphertext_page,
- len, offs, gfp_flags);
+ err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page,
+ ciphertext_page, len, offs,
+ gfp_flags);
if (err)
return ERR_PTR(err);
@@ -276,14 +270,14 @@ struct page *fscrypt_encrypt_page(const struct inode *inode,
return (struct page *)ctx;
/* The encryption operation will require a bounce page. */
- ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
+ ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags);
if (IS_ERR(ciphertext_page))
goto errout;
ctx->w.control_page = page;
- err = do_page_crypto(inode, FS_ENCRYPT, lblk_num,
- page, ciphertext_page,
- len, offs, gfp_flags);
+ err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num,
+ page, ciphertext_page, len, offs,
+ gfp_flags);
if (err) {
ciphertext_page = ERR_PTR(err);
goto errout;
@@ -320,72 +314,11 @@ int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES))
BUG_ON(!PageLocked(page));
- return do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page, len,
- offs, GFP_NOFS);
+ return fscrypt_do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page,
+ len, offs, GFP_NOFS);
}
EXPORT_SYMBOL(fscrypt_decrypt_page);
-int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
- sector_t pblk, unsigned int len)
-{
- struct fscrypt_ctx *ctx;
- struct page *ciphertext_page = NULL;
- struct bio *bio;
- int ret, err = 0;
-
- BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
-
- ctx = fscrypt_get_ctx(inode, GFP_NOFS);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT);
- if (IS_ERR(ciphertext_page)) {
- err = PTR_ERR(ciphertext_page);
- goto errout;
- }
-
- while (len--) {
- err = do_page_crypto(inode, FS_ENCRYPT, lblk,
- ZERO_PAGE(0), ciphertext_page,
- PAGE_SIZE, 0, GFP_NOFS);
- if (err)
- goto errout;
-
- bio = bio_alloc(GFP_NOWAIT, 1);
- if (!bio) {
- err = -ENOMEM;
- goto errout;
- }
- bio->bi_bdev = inode->i_sb->s_bdev;
- bio->bi_iter.bi_sector =
- pblk << (inode->i_sb->s_blocksize_bits - 9);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- ret = bio_add_page(bio, ciphertext_page,
- inode->i_sb->s_blocksize, 0);
- if (ret != inode->i_sb->s_blocksize) {
- /* should never happen! */
- WARN_ON(1);
- bio_put(bio);
- err = -EIO;
- goto errout;
- }
- err = submit_bio_wait(bio);
- if ((err == 0) && bio->bi_error)
- err = -EIO;
- bio_put(bio);
- if (err)
- goto errout;
- lblk++;
- pblk++;
- }
- err = 0;
-errout:
- fscrypt_release_ctx(ctx);
- return err;
-}
-EXPORT_SYMBOL(fscrypt_zeroout_range);
-
/*
* Validate dentries for encrypted directories to make sure we aren't
* potentially caching stale data after a key has been added or
@@ -442,64 +375,6 @@ const struct dentry_operations fscrypt_d_ops = {
};
EXPORT_SYMBOL(fscrypt_d_ops);
-/*
- * Call fscrypt_decrypt_page on every single page, reusing the encryption
- * context.
- */
-static void completion_pages(struct work_struct *work)
-{
- struct fscrypt_ctx *ctx =
- container_of(work, struct fscrypt_ctx, r.work);
- struct bio *bio = ctx->r.bio;
- struct bio_vec *bv;
- int i;
-
- bio_for_each_segment_all(bv, bio, i) {
- struct page *page = bv->bv_page;
- int ret = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
-
- if (ret) {
- WARN_ON_ONCE(1);
- SetPageError(page);
- } else {
- SetPageUptodate(page);
- }
- unlock_page(page);
- }
- fscrypt_release_ctx(ctx);
- bio_put(bio);
-}
-
-void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
-{
- INIT_WORK(&ctx->r.work, completion_pages);
- ctx->r.bio = bio;
- queue_work(fscrypt_read_workqueue, &ctx->r.work);
-}
-EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
-
-void fscrypt_pullback_bio_page(struct page **page, bool restore)
-{
- struct fscrypt_ctx *ctx;
- struct page *bounce_page;
-
- /* The bounce data pages are unmapped. */
- if ((*page)->mapping)
- return;
-
- /* The bounce data page is unmapped. */
- bounce_page = *page;
- ctx = (struct fscrypt_ctx *)page_private(bounce_page);
-
- /* restore control page */
- *page = ctx->w.control_page;
-
- if (restore)
- fscrypt_restore_control_page(bounce_page);
-}
-EXPORT_SYMBOL(fscrypt_pullback_bio_page);
-
void fscrypt_restore_control_page(struct page *page)
{
struct fscrypt_ctx *ctx;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 56ad9d195f18..13052b85c393 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -332,7 +332,7 @@ int fscrypt_fname_usr_to_disk(struct inode *inode,
* in a directory. Consequently, a user space name cannot be mapped to
* a disk-space name
*/
- return -EACCES;
+ return -ENOKEY;
}
EXPORT_SYMBOL(fscrypt_fname_usr_to_disk);
@@ -367,7 +367,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
return 0;
}
if (!lookup)
- return -EACCES;
+ return -ENOKEY;
/*
* We don't have the key and we are doing a lookup; decode the
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index aeab032d7d35..fdbb8af32eaf 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -11,7 +11,7 @@
#ifndef _FSCRYPT_PRIVATE_H
#define _FSCRYPT_PRIVATE_H
-#include <linux/fscrypto.h>
+#include <linux/fscrypt_supp.h>
#define FS_FNAME_CRYPTO_DIGEST_SIZE 32
@@ -71,6 +71,11 @@ struct fscrypt_info {
u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
};
+typedef enum {
+ FS_DECRYPT = 0,
+ FS_ENCRYPT,
+} fscrypt_direction_t;
+
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002
@@ -81,11 +86,20 @@ struct fscrypt_completion_result {
#define DECLARE_FS_COMPLETION_RESULT(ecr) \
struct fscrypt_completion_result ecr = { \
- COMPLETION_INITIALIZER((ecr).completion), 0 }
+ COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 }
/* crypto.c */
-int fscrypt_initialize(unsigned int cop_flags);
+extern int fscrypt_initialize(unsigned int cop_flags);
+extern struct workqueue_struct *fscrypt_read_workqueue;
+extern int fscrypt_do_page_crypto(const struct inode *inode,
+ fscrypt_direction_t rw, u64 lblk_num,
+ struct page *src_page,
+ struct page *dest_page,
+ unsigned int len, unsigned int offs,
+ gfp_t gfp_flags);
+extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
+ gfp_t gfp_flags);
/* keyinfo.c */
extern int fscrypt_get_crypt_info(struct inode *);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 6eeea1dcba41..02eb6b9e4438 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -77,26 +77,22 @@ out:
static int validate_user_key(struct fscrypt_info *crypt_info,
struct fscrypt_context *ctx, u8 *raw_key,
- u8 *prefix, int prefix_size)
+ const char *prefix)
{
- u8 *full_key_descriptor;
+ char *description;
struct key *keyring_key;
struct fscrypt_key *master_key;
const struct user_key_payload *ukp;
- int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1;
int res;
- full_key_descriptor = kmalloc(full_key_len, GFP_NOFS);
- if (!full_key_descriptor)
+ description = kasprintf(GFP_NOFS, "%s%*phN", prefix,
+ FS_KEY_DESCRIPTOR_SIZE,
+ ctx->master_key_descriptor);
+ if (!description)
return -ENOMEM;
- memcpy(full_key_descriptor, prefix, prefix_size);
- sprintf(full_key_descriptor + prefix_size,
- "%*phN", FS_KEY_DESCRIPTOR_SIZE,
- ctx->master_key_descriptor);
- full_key_descriptor[full_key_len - 1] = '\0';
- keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
- kfree(full_key_descriptor);
+ keyring_key = request_key(&key_type_logon, description, NULL);
+ kfree(description);
if (IS_ERR(keyring_key))
return PTR_ERR(keyring_key);
@@ -206,12 +202,15 @@ retry:
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
- if (!fscrypt_dummy_context_enabled(inode))
+ if (!fscrypt_dummy_context_enabled(inode) ||
+ inode->i_sb->s_cop->is_encrypted(inode))
return res;
+ /* Fake up a context for an unencrypted directory */
+ memset(&ctx, 0, sizeof(ctx));
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
- ctx.flags = 0;
+ memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
} else if (res != sizeof(ctx)) {
return -EINVAL;
}
@@ -247,20 +246,10 @@ retry:
if (!raw_key)
goto out;
- if (fscrypt_dummy_context_enabled(inode)) {
- memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
- goto got_key;
- }
-
- res = validate_user_key(crypt_info, &ctx, raw_key,
- FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE);
+ res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX);
if (res && inode->i_sb->s_cop->key_prefix) {
- u8 *prefix = NULL;
- int prefix_size, res2;
-
- prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix);
- res2 = validate_user_key(crypt_info, &ctx, raw_key,
- prefix, prefix_size);
+ int res2 = validate_user_key(crypt_info, &ctx, raw_key,
+ inode->i_sb->s_cop->key_prefix);
if (res2) {
if (res2 == -ENOKEY)
res = -ENOKEY;
@@ -269,7 +258,6 @@ retry:
} else if (res) {
goto out;
}
-got_key:
ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 6ed7c2eebeec..14b76da71269 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -13,37 +13,20 @@
#include <linux/mount.h>
#include "fscrypt_private.h"
-static int inode_has_encryption_context(struct inode *inode)
-{
- if (!inode->i_sb->s_cop->get_context)
- return 0;
- return (inode->i_sb->s_cop->get_context(inode, NULL, 0L) > 0);
-}
-
/*
- * check whether the policy is consistent with the encryption context
- * for the inode
+ * check whether an encryption policy is consistent with an encryption context
*/
-static int is_encryption_context_consistent_with_policy(struct inode *inode,
+static bool is_encryption_context_consistent_with_policy(
+ const struct fscrypt_context *ctx,
const struct fscrypt_policy *policy)
{
- struct fscrypt_context ctx;
- int res;
-
- if (!inode->i_sb->s_cop->get_context)
- return 0;
-
- res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
- if (res != sizeof(ctx))
- return 0;
-
- return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
- FS_KEY_DESCRIPTOR_SIZE) == 0 &&
- (ctx.flags == policy->flags) &&
- (ctx.contents_encryption_mode ==
- policy->contents_encryption_mode) &&
- (ctx.filenames_encryption_mode ==
- policy->filenames_encryption_mode));
+ return memcmp(ctx->master_key_descriptor, policy->master_key_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (ctx->flags == policy->flags) &&
+ (ctx->contents_encryption_mode ==
+ policy->contents_encryption_mode) &&
+ (ctx->filenames_encryption_mode ==
+ policy->filenames_encryption_mode);
}
static int create_encryption_context_from_policy(struct inode *inode,
@@ -66,20 +49,12 @@ static int create_encryption_context_from_policy(struct inode *inode,
FS_KEY_DESCRIPTOR_SIZE);
if (!fscrypt_valid_contents_enc_mode(
- policy->contents_encryption_mode)) {
- printk(KERN_WARNING
- "%s: Invalid contents encryption mode %d\n", __func__,
- policy->contents_encryption_mode);
+ policy->contents_encryption_mode))
return -EINVAL;
- }
if (!fscrypt_valid_filenames_enc_mode(
- policy->filenames_encryption_mode)) {
- printk(KERN_WARNING
- "%s: Invalid filenames encryption mode %d\n", __func__,
- policy->filenames_encryption_mode);
+ policy->filenames_encryption_mode))
return -EINVAL;
- }
if (policy->flags & ~FS_POLICY_FLAGS_VALID)
return -EINVAL;
@@ -98,6 +73,7 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
int ret;
+ struct fscrypt_context ctx;
if (copy_from_user(&policy, arg, sizeof(policy)))
return -EFAULT;
@@ -114,9 +90,10 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
inode_lock(inode);
- if (!inode_has_encryption_context(inode)) {
+ ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+ if (ret == -ENODATA) {
if (!S_ISDIR(inode->i_mode))
- ret = -EINVAL;
+ ret = -ENOTDIR;
else if (!inode->i_sb->s_cop->empty_dir)
ret = -EOPNOTSUPP;
else if (!inode->i_sb->s_cop->empty_dir(inode))
@@ -124,12 +101,14 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
else
ret = create_encryption_context_from_policy(inode,
&policy);
- } else if (!is_encryption_context_consistent_with_policy(inode,
- &policy)) {
- printk(KERN_WARNING
- "%s: Policy inconsistent with encryption context\n",
- __func__);
- ret = -EINVAL;
+ } else if (ret == sizeof(ctx) &&
+ is_encryption_context_consistent_with_policy(&ctx,
+ &policy)) {
+ /* The file already uses the same encryption policy. */
+ ret = 0;
+ } else if (ret >= 0 || ret == -ERANGE) {
+ /* The file already uses a different encryption policy. */
+ ret = -EEXIST;
}
inode_unlock(inode);
@@ -151,8 +130,10 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
return -ENODATA;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+ if (res < 0 && res != -ERANGE)
+ return res;
if (res != sizeof(ctx))
- return -ENODATA;
+ return -EINVAL;
if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
return -EINVAL;
@@ -179,6 +160,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
BUG_ON(1);
}
+ /* No restrictions on file types which are never encrypted */
+ if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
+ !S_ISLNK(child->i_mode))
+ return 1;
+
/* no restrictions if the parent directory is not encrypted */
if (!parent->i_sb->s_cop->is_encrypted(parent))
return 1;
@@ -212,9 +198,9 @@ EXPORT_SYMBOL(fscrypt_has_permitted_context);
* @parent: Parent inode from which the context is inherited.
* @child: Child inode that inherits the context from @parent.
* @fs_data: private data given by FS.
- * @preload: preload child i_crypt_info
+ * @preload: preload child i_crypt_info if true
*
- * Return: Zero on success, non-zero otherwise
+ * Return: 0 on success, -errno on failure
*/
int fscrypt_inherit_context(struct inode *parent, struct inode *child,
void *fs_data, bool preload)
@@ -235,19 +221,11 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
return -ENOKEY;
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
- if (fscrypt_dummy_context_enabled(parent)) {
- ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
- ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
- ctx.flags = 0;
- memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
- res = 0;
- } else {
- ctx.contents_encryption_mode = ci->ci_data_mode;
- ctx.filenames_encryption_mode = ci->ci_filename_mode;
- ctx.flags = ci->ci_flags;
- memcpy(ctx.master_key_descriptor, ci->ci_master_key,
- FS_KEY_DESCRIPTOR_SIZE);
- }
+ ctx.contents_encryption_mode = ci->ci_data_mode;
+ ctx.filenames_encryption_mode = ci->ci_filename_mode;
+ ctx.flags = ci->ci_flags;
+ memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+ FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
res = parent->i_sb->s_cop->set_context(child, &ctx,
sizeof(ctx), fs_data);
diff --git a/fs/dax.c b/fs/dax.c
index 5c74f60d0a50..99b5b4458a78 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -691,8 +691,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
pgoff_t index, unsigned long pfn)
{
struct vm_area_struct *vma;
- pte_t *ptep;
- pte_t pte;
+ pte_t pte, *ptep = NULL;
+ pmd_t *pmdp = NULL;
spinlock_t *ptl;
bool changed;
@@ -707,21 +707,42 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
address = pgoff_address(index, vma);
changed = false;
- if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
+ if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
continue;
- if (pfn != pte_pfn(*ptep))
- goto unlock;
- if (!pte_dirty(*ptep) && !pte_write(*ptep))
- goto unlock;
- flush_cache_page(vma, address, pfn);
- pte = ptep_clear_flush(vma, address, ptep);
- pte = pte_wrprotect(pte);
- pte = pte_mkclean(pte);
- set_pte_at(vma->vm_mm, address, ptep, pte);
- changed = true;
-unlock:
- pte_unmap_unlock(ptep, ptl);
+ if (pmdp) {
+#ifdef CONFIG_FS_DAX_PMD
+ pmd_t pmd;
+
+ if (pfn != pmd_pfn(*pmdp))
+ goto unlock_pmd;
+ if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
+ goto unlock_pmd;
+
+ flush_cache_page(vma, address, pfn);
+ pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+ pmd = pmd_wrprotect(pmd);
+ pmd = pmd_mkclean(pmd);
+ set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+ changed = true;
+unlock_pmd:
+ spin_unlock(ptl);
+#endif
+ } else {
+ if (pfn != pte_pfn(*ptep))
+ goto unlock_pte;
+ if (!pte_dirty(*ptep) && !pte_write(*ptep))
+ goto unlock_pte;
+
+ flush_cache_page(vma, address, pfn);
+ pte = ptep_clear_flush(vma, address, ptep);
+ pte = pte_wrprotect(pte);
+ pte = pte_mkclean(pte);
+ set_pte_at(vma->vm_mm, address, ptep, pte);
+ changed = true;
+unlock_pte:
+ pte_unmap_unlock(ptep, ptl);
+ }
if (changed)
mmu_notifier_invalidate_page(vma->vm_mm, address);
@@ -969,7 +990,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
-#ifdef CONFIG_FS_IOMAP
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
{
return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
@@ -1011,6 +1031,11 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct blk_dax_ctl dax = { 0 };
ssize_t map_len;
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+
dax.sector = dax_iomap_sector(iomap, pos);
dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
map_len = dax_map_atomic(iomap->bdev, &dax);
@@ -1054,15 +1079,19 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
*/
ssize_t
dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
loff_t pos = iocb->ki_pos, ret = 0, done = 0;
unsigned flags = 0;
- if (iov_iter_rw(iter) == WRITE)
+ if (iov_iter_rw(iter) == WRITE) {
+ lockdep_assert_held_exclusive(&inode->i_rwsem);
flags |= IOMAP_WRITE;
+ } else {
+ lockdep_assert_held(&inode->i_rwsem);
+ }
while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
@@ -1098,7 +1127,7 @@ static int dax_fault_return(int error)
* necessary locking for the page fault to proceed successfully.
*/
int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
@@ -1297,7 +1326,7 @@ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
}
int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
+ pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops)
{
struct address_space *mapping = vma->vm_file->f_mapping;
unsigned long pmd_addr = address & PMD_MASK;
@@ -1407,4 +1436,3 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
#endif /* CONFIG_FS_DAX_PMD */
-#endif /* CONFIG_FS_IOMAP */
diff --git a/fs/dcache.c b/fs/dcache.c
index 769903dbc19d..95d71eda8142 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1336,8 +1336,11 @@ int d_set_mounted(struct dentry *dentry)
}
spin_lock(&dentry->d_lock);
if (!d_unlinked(dentry)) {
- dentry->d_flags |= DCACHE_MOUNTED;
- ret = 0;
+ ret = -EBUSY;
+ if (!d_mountpoint(dentry)) {
+ dentry->d_flags |= DCACHE_MOUNTED;
+ ret = 0;
+ }
}
spin_unlock(&dentry->d_lock);
out:
diff --git a/fs/direct-io.c b/fs/direct-io.c
index aeae8c063451..c87bae4376b8 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -906,6 +906,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
struct buffer_head *map_bh)
{
const unsigned blkbits = sdio->blkbits;
+ const unsigned i_blkbits = blkbits + sdio->blkfactor;
int ret = 0;
while (sdio->block_in_file < sdio->final_block_in_request) {
@@ -949,7 +950,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
clean_bdev_aliases(
map_bh->b_bdev,
map_bh->b_blocknr,
- map_bh->b_size >> blkbits);
+ map_bh->b_size >> i_blkbits);
}
if (!sdio->blkfactor)
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e5e29f8c920b..7acd57da4f14 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -42,21 +42,6 @@
#define ENCRYPT 1
/**
- * ecryptfs_to_hex
- * @dst: Buffer to take hex character representation of contents of
- * src; must be at least of size (src_size * 2)
- * @src: Buffer to be converted to a hex string representation
- * @src_size: number of bytes to convert
- */
-void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
-{
- int x;
-
- for (x = 0; x < src_size; x++)
- sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]);
-}
-
-/**
* ecryptfs_from_hex
* @dst: Buffer to take the bytes from src hex; must be at least of
* size (src_size / 2)
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 599a29237cfe..8d988012979c 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -31,6 +31,7 @@
#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
+#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/fs_stack.h>
#include <linux/namei.h>
@@ -51,7 +52,13 @@
#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok);
-extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size);
+static inline void
+ecryptfs_to_hex(char *dst, char *src, size_t src_size)
+{
+ char *end = bin2hex(dst, src, src_size);
+ *end = '\0';
+}
+
extern void ecryptfs_from_hex(char *dst, char *src, int dst_size);
struct ecryptfs_key_record {
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 3cf1546dca82..11d39ab7774e 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1326,7 +1326,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
if ((*new_auth_tok)->session_key.encrypted_key_size
> ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
printk(KERN_WARNING "Tag 1 packet contains key larger "
- "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES");
+ "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES\n");
rc = -EINVAL;
goto out;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 151872dcc1f4..6af4fa0aa6b9 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -426,7 +426,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
mount_crypt_stat->global_default_cipher_key_size);
if (!cipher_code) {
ecryptfs_printk(KERN_ERR,
- "eCryptfs doesn't support cipher: %s",
+ "eCryptfs doesn't support cipher: %s\n",
mount_crypt_stat->global_default_cipher_name);
rc = -EINVAL;
goto out;
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index e4141f257495..595e684a95a5 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -383,7 +383,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
goto memdup;
} else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) {
printk(KERN_WARNING "%s: Acceptable packet size range is "
- "[%d-%zu], but amount of data written is [%zu].",
+ "[%d-%zu], but amount of data written is [%zu].\n",
__func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count);
return -EINVAL;
}
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index 36bea5adcaba..c634874e12d9 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,6 +1,5 @@
config EXT2_FS
tristate "Second extended fs support"
- select FS_IOMAP if FS_DAX
help
Ext2 is a standard Linux file system for hard disks.
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 37e2be784ac7..5e64de9c5093 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,7 +814,7 @@ extern const struct file_operations ext2_file_operations;
/* inode.c */
extern const struct address_space_operations ext2_aops;
extern const struct address_space_operations ext2_nobh_aops;
-extern struct iomap_ops ext2_iomap_ops;
+extern const struct iomap_ops ext2_iomap_ops;
/* namei.c */
extern const struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index f073bfca694b..128cce540645 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -842,13 +842,13 @@ ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
return 0;
}
-struct iomap_ops ext2_iomap_ops = {
+const struct iomap_ops ext2_iomap_ops = {
.iomap_begin = ext2_iomap_begin,
.iomap_end = ext2_iomap_end,
};
#else
/* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */
-struct iomap_ops ext2_iomap_ops;
+const struct iomap_ops ext2_iomap_ops;
#endif /* CONFIG_FS_DAX */
int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 7b90691e98c4..e38039fd96ff 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -37,7 +37,6 @@ config EXT4_FS
select CRC16
select CRYPTO
select CRYPTO_CRC32C
- select FS_IOMAP if FS_DAX
help
This is the next generation of the ext3 filesystem.
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2163c1e69f2a..212ceb5cd83e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -32,7 +32,11 @@
#include <linux/percpu_counter.h>
#include <linux/ratelimit.h>
#include <crypto/hash.h>
-#include <linux/fscrypto.h>
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#include <linux/fscrypt_supp.h>
+#else
+#include <linux/fscrypt_notsupp.h>
+#endif
#include <linux/falloc.h>
#include <linux/percpu-rwsem.h>
#ifdef __KERNEL__
@@ -679,6 +683,16 @@ struct fsxattr {
#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
+#define EXT4_IOC_GOINGDOWN _IOR ('X', 125, __u32)
+
+/*
+ * Flags for going down operation
+ */
+#define EXT4_GOING_FLAGS_DEFAULT 0x0 /* going down */
+#define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
+#define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
+
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
@@ -1343,11 +1357,6 @@ struct ext4_super_block {
/* Number of quota types we support */
#define EXT4_MAXQUOTAS 3
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-#define EXT4_KEY_DESC_PREFIX "ext4:"
-#define EXT4_KEY_DESC_PREFIX_SIZE 5
-#endif
-
/*
* fourth extended-fs super-block data in memory
*/
@@ -1404,8 +1413,7 @@ struct ext4_sb_info {
struct journal_s *s_journal;
struct list_head s_orphan;
struct mutex s_orphan_lock;
- unsigned long s_resize_flags; /* Flags indicating if there
- is a resizer */
+ unsigned long s_ext4_flags; /* Ext4 superblock flags */
unsigned long s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time;
@@ -1517,12 +1525,6 @@ struct ext4_sb_info {
/* Barrier between changing inodes' journal flags and writepages ops. */
struct percpu_rw_semaphore s_journal_flag_rwsem;
-
- /* Encryption support */
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- u8 key_prefix[EXT4_KEY_DESC_PREFIX_SIZE];
- u8 key_prefix_size;
-#endif
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1845,6 +1847,18 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
}
/*
+ * Superblock flags
+ */
+#define EXT4_FLAGS_RESIZING 0
+#define EXT4_FLAGS_SHUTDOWN 1
+
+static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
+{
+ return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+}
+
+
+/*
* Default values for user and/or group using reserved blocks
*/
#define EXT4_DEF_RESUID 0
@@ -2320,28 +2334,6 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
}
static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
-#define fscrypt_set_d_op(i)
-#define fscrypt_get_ctx fscrypt_notsupp_get_ctx
-#define fscrypt_release_ctx fscrypt_notsupp_release_ctx
-#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page
-#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page
-#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages
-#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page
-#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page
-#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range
-#define fscrypt_ioctl_set_policy fscrypt_notsupp_ioctl_set_policy
-#define fscrypt_ioctl_get_policy fscrypt_notsupp_ioctl_get_policy
-#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context
-#define fscrypt_inherit_context fscrypt_notsupp_inherit_context
-#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info
-#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info
-#define fscrypt_setup_filename fscrypt_notsupp_setup_filename
-#define fscrypt_free_filename fscrypt_notsupp_free_filename
-#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size
-#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer
-#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer
-#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr
-#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk
#endif
/* dir.c */
@@ -3034,7 +3026,7 @@ extern int ext4_inline_data_fiemap(struct inode *inode,
extern int ext4_try_to_evict_inline_data(handle_t *handle,
struct inode *inode,
int needed);
-extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
+extern int ext4_inline_data_truncate(struct inode *inode, int *has_inline);
extern int ext4_convert_inline_data(struct inode *inode);
@@ -3228,7 +3220,6 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
EXT4_WQ_HASH_SZ])
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-#define EXT4_RESIZING 0
extern int ext4_resize_begin(struct super_block *sb);
extern void ext4_resize_end(struct super_block *sb);
@@ -3253,7 +3244,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
}
}
-extern struct iomap_ops ext4_iomap_ops;
+extern const struct iomap_ops ext4_iomap_ops;
#endif /* __KERNEL__ */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index e770c1ee4613..dd106b1d5d89 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -43,6 +43,10 @@ static int ext4_journal_check_start(struct super_block *sb)
journal_t *journal;
might_sleep();
+
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ return -EIO;
+
if (sb->s_flags & MS_RDONLY)
return -EROFS;
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
@@ -161,6 +165,13 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
might_sleep();
if (ext4_handle_valid(handle)) {
+ struct super_block *sb;
+
+ sb = handle->h_transaction->t_journal->j_private;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) {
+ jbd2_journal_abort_handle(handle);
+ return -EIO;
+ }
err = jbd2_journal_get_write_access(handle, bh);
if (err)
ext4_journal_abort_handle(where, line, __func__, bh,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3e295d3350a9..2a97dff87b96 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5334,7 +5334,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_lblk_t stop, *iterator, ex_start, ex_end;
/* Let path point to the last extent */
- path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -5343,15 +5344,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
if (!extent)
goto out;
- stop = le32_to_cpu(extent->ee_block) +
- ext4_ext_get_actual_len(extent);
+ stop = le32_to_cpu(extent->ee_block);
/*
* In case of left shift, Don't start shifting extents until we make
* sure the hole is big enough to accommodate the shift.
*/
if (SHIFT == SHIFT_LEFT) {
- path = ext4_find_extent(inode, start - 1, &path, 0);
+ path = ext4_find_extent(inode, start - 1, &path,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5383,9 +5384,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
else
iterator = &stop;
- /* Its safe to start updating extents */
- while (start < stop) {
- path = ext4_find_extent(inode, *iterator, &path, 0);
+ /*
+ * Its safe to start updating extents. Start and stop are unsigned, so
+ * in case of right shift if extent with 0 block is reached, iterator
+ * becomes NULL to indicate the end of the loop.
+ */
+ while (iterator && start <= stop) {
+ path = ext4_find_extent(inode, *iterator, &path,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5412,8 +5418,11 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
- *iterator = le32_to_cpu(extent->ee_block) > 0 ?
- le32_to_cpu(extent->ee_block) - 1 : 0;
+ if (le32_to_cpu(extent->ee_block) > 0)
+ *iterator = le32_to_cpu(extent->ee_block) - 1;
+ else
+ /* Beginning is reached, end of the loop */
+ iterator = NULL;
/* Update path extent in case we need to stop */
while (le32_to_cpu(extent->ee_block) < start)
extent++;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d663d3d7c81c..87e11dfe3cde 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -57,6 +57,9 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb))))
+ return -EIO;
+
if (!iov_iter_count(to))
return 0; /* skip atime */
@@ -175,7 +178,6 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
- bool overwrite = false;
inode_lock(inode);
ret = ext4_write_checks(iocb, from);
@@ -188,16 +190,9 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret)
goto out;
- if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
- overwrite = true;
- downgrade_write(&inode->i_rwsem);
- }
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
out:
- if (!overwrite)
- inode_unlock(inode);
- else
- inode_unlock_shared(inode);
+ inode_unlock(inode);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
@@ -213,6 +208,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
int overwrite = 0;
ssize_t ret;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
#ifdef CONFIG_FS_DAX
if (IS_DAX(inode))
return ext4_dax_write_iter(iocb, from);
@@ -348,6 +346,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_mapping->host;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
if (ext4_encrypted_inode(inode)) {
int err = fscrypt_get_encryption_info(inode);
if (err)
@@ -375,6 +376,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
char buf[64], *cp;
int ret;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
!(sb->s_flags & MS_RDONLY))) {
sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 88effb1053c7..9d549608fd30 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -100,6 +100,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
tid_t commit_tid;
bool needs_barrier = false;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
J_ASSERT(ext4_journal_current_handle() == NULL);
trace_ext4_sync_file_enter(file, datasync);
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index e026aa941fd5..38b8a96eb97c 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -10,7 +10,8 @@
*/
#include <linux/fs.h>
-#include <linux/cryptohash.h>
+#include <linux/compiler.h>
+#include <linux/bitops.h>
#include "ext4.h"
#define DELTA 0x9E3779B9
@@ -32,6 +33,74 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
buf[1] += b1;
}
+/* F, G and H are basic MD4 functions: selection, majority, parity */
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+
+/*
+ * The generic round function. The application is so specific that
+ * we don't bother protecting all the arguments with parens, as is generally
+ * good macro practice, in favor of extra legibility.
+ * Rotation is separate from addition to prevent recomputation
+ */
+#define ROUND(f, a, b, c, d, x, s) \
+ (a += f(b, c, d) + x, a = rol32(a, s))
+#define K1 0
+#define K2 013240474631UL
+#define K3 015666365641UL
+
+/*
+ * Basic cut-down MD4 transform. Returns only 32 bits of result.
+ */
+static __u32 half_md4_transform(__u32 buf[4], __u32 const in[8])
+{
+ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
+
+ /* Round 1 */
+ ROUND(F, a, b, c, d, in[0] + K1, 3);
+ ROUND(F, d, a, b, c, in[1] + K1, 7);
+ ROUND(F, c, d, a, b, in[2] + K1, 11);
+ ROUND(F, b, c, d, a, in[3] + K1, 19);
+ ROUND(F, a, b, c, d, in[4] + K1, 3);
+ ROUND(F, d, a, b, c, in[5] + K1, 7);
+ ROUND(F, c, d, a, b, in[6] + K1, 11);
+ ROUND(F, b, c, d, a, in[7] + K1, 19);
+
+ /* Round 2 */
+ ROUND(G, a, b, c, d, in[1] + K2, 3);
+ ROUND(G, d, a, b, c, in[3] + K2, 5);
+ ROUND(G, c, d, a, b, in[5] + K2, 9);
+ ROUND(G, b, c, d, a, in[7] + K2, 13);
+ ROUND(G, a, b, c, d, in[0] + K2, 3);
+ ROUND(G, d, a, b, c, in[2] + K2, 5);
+ ROUND(G, c, d, a, b, in[4] + K2, 9);
+ ROUND(G, b, c, d, a, in[6] + K2, 13);
+
+ /* Round 3 */
+ ROUND(H, a, b, c, d, in[3] + K3, 3);
+ ROUND(H, d, a, b, c, in[7] + K3, 9);
+ ROUND(H, c, d, a, b, in[2] + K3, 11);
+ ROUND(H, b, c, d, a, in[6] + K3, 15);
+ ROUND(H, a, b, c, d, in[1] + K3, 3);
+ ROUND(H, d, a, b, c, in[5] + K3, 9);
+ ROUND(H, c, d, a, b, in[0] + K3, 11);
+ ROUND(H, b, c, d, a, in[4] + K3, 15);
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+
+ return buf[1]; /* "most hashed" word */
+}
+#undef ROUND
+#undef K1
+#undef K2
+#undef K3
+#undef F
+#undef G
+#undef H
/* The old legacy hash */
static __u32 dx_hack_hash_unsigned(const char *name, int len)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index e57e8d90ea54..b14bae2598bc 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -764,6 +764,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return ERR_PTR(-EIO);
+
if ((ext4_encrypted_inode(dir) ||
DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
@@ -771,7 +774,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (err)
return ERR_PTR(err);
if (!fscrypt_has_encryption_key(dir))
- return ERR_PTR(-EPERM);
+ return ERR_PTR(-ENOKEY);
if (!handle)
nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
encrypt = 1;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 437df6a1a841..30a9f210d1e3 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -215,6 +215,9 @@ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
struct ext4_inode *raw_inode;
int cp_len = 0;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return;
+
BUG_ON(!EXT4_I(inode)->i_inline_off);
BUG_ON(pos + len > EXT4_I(inode)->i_inline_size);
@@ -381,7 +384,7 @@ out:
static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
unsigned int len)
{
- int ret, size;
+ int ret, size, no_expand;
struct ext4_inode_info *ei = EXT4_I(inode);
if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
@@ -391,15 +394,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
if (size < len)
return -ENOSPC;
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
if (ei->i_inline_off)
ret = ext4_update_inline_data(handle, inode, len);
else
ret = ext4_create_inline_data(handle, inode, len);
- up_write(&EXT4_I(inode)->xattr_sem);
-
+ ext4_write_unlock_xattr(inode, &no_expand);
return ret;
}
@@ -533,7 +535,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
struct inode *inode,
unsigned flags)
{
- int ret, needed_blocks;
+ int ret, needed_blocks, no_expand;
handle_t *handle = NULL;
int retries = 0, sem_held = 0;
struct page *page = NULL;
@@ -573,7 +575,7 @@ retry:
goto out;
}
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
sem_held = 1;
/* If some one has already done this for us, just exit. */
if (!ext4_has_inline_data(inode)) {
@@ -610,7 +612,7 @@ retry:
put_page(page);
page = NULL;
ext4_orphan_add(handle, inode);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
sem_held = 0;
ext4_journal_stop(handle);
handle = NULL;
@@ -636,7 +638,7 @@ out:
put_page(page);
}
if (sem_held)
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
if (handle)
ext4_journal_stop(handle);
brelse(iloc.bh);
@@ -729,7 +731,7 @@ convert:
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page)
{
- int ret;
+ int ret, no_expand;
void *kaddr;
struct ext4_iloc iloc;
@@ -747,7 +749,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
goto out;
}
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
BUG_ON(!ext4_has_inline_data(inode));
kaddr = kmap_atomic(page);
@@ -757,7 +759,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
/* clear page dirty so that writepages wouldn't work for us. */
ClearPageDirty(page);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
brelse(iloc.bh);
out:
return copied;
@@ -768,7 +770,7 @@ ext4_journalled_write_inline_data(struct inode *inode,
unsigned len,
struct page *page)
{
- int ret;
+ int ret, no_expand;
void *kaddr;
struct ext4_iloc iloc;
@@ -778,11 +780,11 @@ ext4_journalled_write_inline_data(struct inode *inode,
return NULL;
}
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
kaddr = kmap_atomic(page);
ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
kunmap_atomic(kaddr);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return iloc.bh;
}
@@ -944,8 +946,15 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
struct page *page)
{
int i_size_changed = 0;
+ int ret;
- copied = ext4_write_inline_data_end(inode, pos, len, copied, page);
+ ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
+ return ret;
+ }
+ copied = ret;
/*
* No need to use i_size_read() here, the i_size
@@ -1043,7 +1052,6 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
dir->i_mtime = dir->i_ctime = current_time(dir);
ext4_update_dx_flag(dir);
dir->i_version++;
- ext4_mark_inode_dirty(handle, dir);
return 1;
}
@@ -1259,7 +1267,7 @@ out:
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
struct inode *dir, struct inode *inode)
{
- int ret, inline_size;
+ int ret, inline_size, no_expand;
void *inline_start;
struct ext4_iloc iloc;
@@ -1267,7 +1275,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
if (ret)
return ret;
- down_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_lock_xattr(dir, &no_expand);
if (!ext4_has_inline_data(dir))
goto out;
@@ -1312,8 +1320,8 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
ret = ext4_convert_inline_data_nolock(handle, dir, &iloc);
out:
+ ext4_write_unlock_xattr(dir, &no_expand);
ext4_mark_inode_dirty(handle, dir);
- up_write(&EXT4_I(dir)->xattr_sem);
brelse(iloc.bh);
return ret;
}
@@ -1673,7 +1681,7 @@ int ext4_delete_inline_entry(handle_t *handle,
struct buffer_head *bh,
int *has_inline_data)
{
- int err, inline_size;
+ int err, inline_size, no_expand;
struct ext4_iloc iloc;
void *inline_start;
@@ -1681,7 +1689,7 @@ int ext4_delete_inline_entry(handle_t *handle,
if (err)
return err;
- down_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_lock_xattr(dir, &no_expand);
if (!ext4_has_inline_data(dir)) {
*has_inline_data = 0;
goto out;
@@ -1709,13 +1717,11 @@ int ext4_delete_inline_entry(handle_t *handle,
if (err)
goto out;
- err = ext4_mark_inode_dirty(handle, dir);
- if (unlikely(err))
- goto out;
-
ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
out:
- up_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_unlock_xattr(dir, &no_expand);
+ if (likely(err == 0))
+ err = ext4_mark_inode_dirty(handle, dir);
brelse(iloc.bh);
if (err != -ENOENT)
ext4_std_error(dir->i_sb, err);
@@ -1814,11 +1820,11 @@ out:
int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
{
- int ret;
+ int ret, no_expand;
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
ret = ext4_destroy_inline_data_nolock(handle, inode);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return ret;
}
@@ -1900,10 +1906,10 @@ out:
return error;
}
-void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
+int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
{
handle_t *handle;
- int inline_size, value_len, needed_blocks;
+ int inline_size, value_len, needed_blocks, no_expand, err = 0;
size_t i_size;
void *value = NULL;
struct ext4_xattr_ibody_find is = {
@@ -1918,19 +1924,19 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
needed_blocks = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks);
if (IS_ERR(handle))
- return;
+ return PTR_ERR(handle);
- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
*has_inline = 0;
ext4_journal_stop(handle);
- return;
+ return 0;
}
- if (ext4_orphan_add(handle, inode))
+ if ((err = ext4_orphan_add(handle, inode)) != 0)
goto out;
- if (ext4_get_inode_loc(inode, &is.iloc))
+ if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0)
goto out;
down_write(&EXT4_I(inode)->i_data_sem);
@@ -1941,24 +1947,29 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
if (i_size < inline_size) {
/* Clear the content in the xattr space. */
if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) {
- if (ext4_xattr_ibody_find(inode, &i, &is))
+ if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0)
goto out_error;
BUG_ON(is.s.not_found);
value_len = le32_to_cpu(is.s.here->e_value_size);
value = kmalloc(value_len, GFP_NOFS);
- if (!value)
+ if (!value) {
+ err = -ENOMEM;
goto out_error;
+ }
- if (ext4_xattr_ibody_get(inode, i.name_index, i.name,
- value, value_len))
+ err = ext4_xattr_ibody_get(inode, i.name_index,
+ i.name, value, value_len);
+ if (err <= 0)
goto out_error;
i.value = value;
i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
i_size - EXT4_MIN_INLINE_DATA_SIZE : 0;
- if (ext4_xattr_ibody_inline_set(handle, inode, &i, &is))
+ err = ext4_xattr_ibody_inline_set(handle, inode,
+ &i, &is);
+ if (err)
goto out_error;
}
@@ -1978,23 +1989,24 @@ out_error:
up_write(&EXT4_I(inode)->i_data_sem);
out:
brelse(is.iloc.bh);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
kfree(value);
if (inode->i_nlink)
ext4_orphan_del(handle, inode);
- inode->i_mtime = inode->i_ctime = current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
-
+ if (err == 0) {
+ inode->i_mtime = inode->i_ctime = current_time(inode);
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ }
ext4_journal_stop(handle);
- return;
+ return err;
}
int ext4_convert_inline_data(struct inode *inode)
{
- int error, needed_blocks;
+ int error, needed_blocks, no_expand;
handle_t *handle;
struct ext4_iloc iloc;
@@ -2016,15 +2028,10 @@ int ext4_convert_inline_data(struct inode *inode)
goto out_free;
}
- down_write(&EXT4_I(inode)->xattr_sem);
- if (!ext4_has_inline_data(inode)) {
- up_write(&EXT4_I(inode)->xattr_sem);
- goto out;
- }
-
- error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
- up_write(&EXT4_I(inode)->xattr_sem);
-out:
+ ext4_write_lock_xattr(inode, &no_expand);
+ if (ext4_has_inline_data(inode))
+ error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
+ ext4_write_unlock_xattr(inode, &no_expand);
ext4_journal_stop(handle);
out_free:
brelse(iloc.bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 88d57af1b516..75212a6e69f8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1189,6 +1189,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
unsigned from, to;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
trace_ext4_write_begin(inode, pos, len, flags);
/*
* Reserve one block more for addition to orphan list in case
@@ -1330,8 +1333,11 @@ static int ext4_write_end(struct file *file,
if (ext4_has_inline_data(inode)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
- if (ret < 0)
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
goto errout;
+ }
copied = ret;
} else
copied = block_write_end(file, mapping, pos,
@@ -1385,7 +1391,9 @@ errout:
* set the buffer to be dirty, since in data=journalled mode we need
* to call ext4_handle_dirty_metadata() instead.
*/
-static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
+static void ext4_journalled_zero_new_buffers(handle_t *handle,
+ struct page *page,
+ unsigned from, unsigned to)
{
unsigned int block_start = 0, block_end;
struct buffer_head *head, *bh;
@@ -1402,7 +1410,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
size = min(to, block_end) - start;
zero_user(page, start, size);
- set_buffer_uptodate(bh);
+ write_end_fn(handle, bh);
}
clear_buffer_new(bh);
}
@@ -1431,18 +1439,25 @@ static int ext4_journalled_write_end(struct file *file,
BUG_ON(!ext4_handle_valid(handle));
- if (ext4_has_inline_data(inode))
- copied = ext4_write_inline_data_end(inode, pos, len,
- copied, page);
- else {
- if (copied < len) {
- if (!PageUptodate(page))
- copied = 0;
- zero_new_buffers(page, from+copied, to);
+ if (ext4_has_inline_data(inode)) {
+ ret = ext4_write_inline_data_end(inode, pos, len,
+ copied, page);
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
+ goto errout;
}
-
+ copied = ret;
+ } else if (unlikely(copied < len) && !PageUptodate(page)) {
+ copied = 0;
+ ext4_journalled_zero_new_buffers(handle, page, from, to);
+ } else {
+ if (unlikely(copied < len))
+ ext4_journalled_zero_new_buffers(handle, page,
+ from + copied, to);
ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
- to, &partial, write_end_fn);
+ from + copied, &partial,
+ write_end_fn);
if (!partial)
SetPageUptodate(page);
}
@@ -1468,6 +1483,7 @@ static int ext4_journalled_write_end(struct file *file,
*/
ext4_orphan_add(handle, inode);
+errout:
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
@@ -2034,6 +2050,12 @@ static int ext4_writepage(struct page *page,
struct ext4_io_submit io_submit;
bool keep_towrite = false;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
+ ext4_invalidatepage(page, 0, PAGE_SIZE);
+ unlock_page(page);
+ return -EIO;
+ }
+
trace_ext4_writepage(page);
size = i_size_read(inode);
if (page->index == size >> PAGE_SHIFT)
@@ -2409,7 +2431,8 @@ static int mpage_map_and_submit_extent(handle_t *handle,
if (err < 0) {
struct super_block *sb = inode->i_sb;
- if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (ext4_forced_shutdown(EXT4_SB(sb)) ||
+ EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
goto invalidate_dirty_pages;
/*
* Let the uper layers retry transient errors.
@@ -2464,8 +2487,8 @@ update_disksize:
disksize = i_size;
if (disksize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = disksize;
- err2 = ext4_mark_inode_dirty(handle, inode);
up_write(&EXT4_I(inode)->i_data_sem);
+ err2 = ext4_mark_inode_dirty(handle, inode);
if (err2)
ext4_error(inode->i_sb,
"Failed to mark inode %lu dirty",
@@ -2631,6 +2654,9 @@ static int ext4_writepages(struct address_space *mapping,
struct blk_plug plug;
bool give_up_on_write = false;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
percpu_down_read(&sbi->s_journal_flag_rwsem);
trace_ext4_writepages(inode, wbc);
@@ -2667,7 +2693,8 @@ static int ext4_writepages(struct address_space *mapping,
* *never* be called, so if that ever happens, we would want
* the stack trace.
*/
- if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
+ sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
ret = -EROFS;
goto out_writepages;
}
@@ -2892,6 +2919,9 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
handle_t *handle;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
index = pos >> PAGE_SHIFT;
if (ext4_nonda_switch(inode->i_sb) ||
@@ -3420,7 +3450,7 @@ orphan_del:
return ret;
}
-struct iomap_ops ext4_iomap_ops = {
+const struct iomap_ops ext4_iomap_ops = {
.iomap_begin = ext4_iomap_begin,
.iomap_end = ext4_iomap_end,
};
@@ -3914,6 +3944,10 @@ static int ext4_block_truncate_page(handle_t *handle,
unsigned blocksize;
struct inode *inode = mapping->host;
+ /* If we are processing an encrypted inode during orphan list handling */
+ if (ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode))
+ return 0;
+
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
@@ -4222,7 +4256,9 @@ int ext4_truncate(struct inode *inode)
if (ext4_has_inline_data(inode)) {
int has_inline = 1;
- ext4_inline_data_truncate(inode, &has_inline);
+ err = ext4_inline_data_truncate(inode, &has_inline);
+ if (err)
+ return err;
if (has_inline)
return 0;
}
@@ -5197,6 +5233,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
error = setattr_prepare(dentry, attr);
if (error)
return error;
@@ -5483,6 +5522,9 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{
int err = 0;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
@@ -5506,6 +5548,9 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
{
int err;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
err = ext4_get_inode_loc(inode, iloc);
if (!err) {
BUFFER_TRACE(iloc->bh, "get_write_access");
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index d534399cf607..b383ebf4020c 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -16,6 +16,7 @@
#include <linux/quotaops.h>
#include <linux/uuid.h>
#include <linux/uaccess.h>
+#include <linux/delay.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -442,6 +443,52 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
return iflags;
}
+int ext4_goingdown(struct super_block *sb, unsigned long arg)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ __u32 flags;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(flags, (__u32 __user *)arg))
+ return -EFAULT;
+
+ if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
+ return -EINVAL;
+
+ if (ext4_forced_shutdown(sbi))
+ return 0;
+
+ ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
+
+ switch (flags) {
+ case EXT4_GOING_FLAGS_DEFAULT:
+ freeze_bdev(sb->s_bdev);
+ set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+ thaw_bdev(sb->s_bdev, sb);
+ break;
+ case EXT4_GOING_FLAGS_LOGFLUSH:
+ set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+ if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
+ (void) ext4_force_commit(sb);
+ jbd2_journal_abort(sbi->s_journal, 0);
+ }
+ break;
+ case EXT4_GOING_FLAGS_NOLOGFLUSH:
+ set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+ if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
+ msleep(100);
+ jbd2_journal_abort(sbi->s_journal, 0);
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+ clear_opt(sb, DISCARD);
+ return 0;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -893,6 +940,8 @@ resizefs_out:
return 0;
}
+ case EXT4_IOC_GOINGDOWN:
+ return ext4_goingdown(sb, arg);
default:
return -ENOTTY;
}
@@ -959,6 +1008,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_SET_ENCRYPTION_POLICY:
case EXT4_IOC_GET_ENCRYPTION_PWSALT:
case EXT4_IOC_GET_ENCRYPTION_POLICY:
+ case EXT4_IOC_GOINGDOWN:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7ae43c59bc79..10c62de642c6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1556,7 +1556,17 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block,
ex->fe_len += 1 << order;
}
- BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
+ if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) {
+ /* Should never happen! (but apparently sometimes does?!?) */
+ WARN_ON(1);
+ ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
+ "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
+ block, order, needed, ex->fe_group, ex->fe_start,
+ ex->fe_len, ex->fe_logical);
+ ex->fe_len = 0;
+ ex->fe_start = 0;
+ ex->fe_group = 0;
+ }
return ex->fe_len;
}
@@ -2136,8 +2146,10 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
* We search using buddy data only if the order of the request
* is greater than equal to the sbi_s_mb_order2_reqs
* You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
+ * We also support searching for power-of-two requests only for
+ * requests upto maximum buddy size we have constructed.
*/
- if (i >= sbi->s_mb_order2_reqs) {
+ if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
/*
* This should tell if fe_len is exactly power of 2
*/
@@ -2207,7 +2219,7 @@ repeat:
}
ac->ac_groups_scanned++;
- if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
+ if (cr == 0)
ext4_mb_simple_scan_group(ac, &e4b);
else if (cr == 1 && sbi->s_stripe &&
!(ac->ac_g_ex.fe_len % sbi->s_stripe))
@@ -3123,6 +3135,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
if (ar->pright && start + size - 1 >= ar->lright)
size -= start + size - ar->lright;
+ /*
+ * Trim allocation request for filesystems with artificially small
+ * groups.
+ */
+ if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+ size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
+
end = start + size;
/* check we don't cross already preallocated blocks */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index eadba919f26b..6ad612c576fc 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1378,6 +1378,8 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
return NULL;
retval = ext4_fname_setup_filename(dir, d_name, 1, &fname);
+ if (retval == -ENOENT)
+ return NULL;
if (retval)
return ERR_PTR(retval);
@@ -1616,13 +1618,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
!fscrypt_has_permitted_context(dir, inode)) {
int nokey = ext4_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode);
- iput(inode);
- if (nokey)
+ if (nokey) {
+ iput(inode);
return ERR_PTR(-ENOKEY);
+ }
ext4_warning(inode->i_sb,
"Inconsistent encryption contexts: %lu/%lu",
(unsigned long) dir->i_ino,
(unsigned long) inode->i_ino);
+ iput(inode);
return ERR_PTR(-EPERM);
}
}
@@ -2935,6 +2939,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
struct ext4_dir_entry_2 *de;
handle_t *handle = NULL;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return -EIO;
+
/* Initialize quotas before so that eventual writes go in
* separate transaction */
retval = dquot_initialize(dir);
@@ -3008,6 +3015,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
struct ext4_dir_entry_2 *de;
handle_t *handle = NULL;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return -EIO;
+
trace_ext4_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
@@ -3078,6 +3088,9 @@ static int ext4_symlink(struct inode *dir,
struct fscrypt_str disk_link;
struct fscrypt_symlink_data *sd = NULL;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return -EIO;
+
disk_link.len = len + 1;
disk_link.name = (char *) symname;
@@ -3088,7 +3101,7 @@ static int ext4_symlink(struct inode *dir,
if (err)
return err;
if (!fscrypt_has_encryption_key(dir))
- return -EPERM;
+ return -ENOKEY;
disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
sizeof(struct fscrypt_symlink_data));
sd = kzalloc(disk_link.len, GFP_KERNEL);
@@ -3525,6 +3538,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
EXT4_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
+ if ((ext4_encrypted_inode(old_dir) &&
+ !fscrypt_has_encryption_key(old_dir)) ||
+ (ext4_encrypted_inode(new_dir) &&
+ !fscrypt_has_encryption_key(new_dir)))
+ return -ENOKEY;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
@@ -3725,6 +3744,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int retval;
struct timespec ctime;
+ if ((ext4_encrypted_inode(old_dir) &&
+ !fscrypt_has_encryption_key(old_dir)) ||
+ (ext4_encrypted_inode(new_dir) &&
+ !fscrypt_has_encryption_key(new_dir)))
+ return -ENOKEY;
+
if ((ext4_encrypted_inode(old_dir) ||
ext4_encrypted_inode(new_dir)) &&
(old_dir != new_dir) &&
@@ -3858,6 +3883,9 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
+ return -EIO;
+
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index d83b0f3c5fe9..208241b06662 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -24,7 +24,6 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/backing-dev.h>
-#include <linux/fscrypto.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -158,7 +157,7 @@ static int ext4_end_io(ext4_io_end_t *io)
io->handle = NULL; /* Following call will use up the handle */
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
- if (ret < 0) {
+ if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written "
"extents -- potential data loss! "
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index cf681004b196..c3ed9021b781 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -45,7 +45,8 @@ int ext4_resize_begin(struct super_block *sb)
return -EPERM;
}
- if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags))
+ if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
+ &EXT4_SB(sb)->s_ext4_flags))
ret = -EBUSY;
return ret;
@@ -53,7 +54,7 @@ int ext4_resize_begin(struct super_block *sb)
void ext4_resize_end(struct super_block *sb)
{
- clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags);
+ clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags);
smp_mb__after_atomic();
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 66845a08a87a..a673558fe5f8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -438,6 +438,9 @@ void __ext4_error(struct super_block *sb, const char *function,
struct va_format vaf;
va_list args;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ return;
+
if (ext4_error_ratelimit(sb)) {
va_start(args, fmt);
vaf.fmt = fmt;
@@ -459,6 +462,9 @@ void __ext4_error_inode(struct inode *inode, const char *function,
struct va_format vaf;
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return;
+
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
es->s_last_error_block = cpu_to_le64(block);
if (ext4_error_ratelimit(inode->i_sb)) {
@@ -491,6 +497,9 @@ void __ext4_error_file(struct file *file, const char *function,
struct inode *inode = file_inode(file);
char pathname[80], *path;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return;
+
es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
if (ext4_error_ratelimit(inode->i_sb)) {
@@ -567,6 +576,9 @@ void __ext4_std_error(struct super_block *sb, const char *function,
char nbuf[16];
const char *errstr;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ return;
+
/* Special case: if the error is EROFS, and we're not already
* inside a transaction, then there's really no point in logging
* an error. */
@@ -600,6 +612,9 @@ void __ext4_abort(struct super_block *sb, const char *function,
struct va_format vaf;
va_list args;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ return;
+
save_error_info(sb, function, line);
va_start(args, fmt);
vaf.fmt = fmt;
@@ -695,6 +710,9 @@ __acquires(bitlock)
va_list args;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ return;
+
es->s_last_error_ino = cpu_to_le32(ino);
es->s_last_error_block = cpu_to_le64(block);
__save_error_info(sb, function, line);
@@ -825,6 +843,7 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ int aborted = 0;
int i, err;
ext4_unregister_li_request(sb);
@@ -834,9 +853,10 @@ static void ext4_put_super(struct super_block *sb)
destroy_workqueue(sbi->rsv_conversion_wq);
if (sbi->s_journal) {
+ aborted = is_journal_aborted(sbi->s_journal);
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
- if (err < 0)
+ if ((err < 0) && !aborted)
ext4_abort(sb, "Couldn't clean up the journal");
}
@@ -847,7 +867,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_mb_release(sb);
ext4_ext_release(sb);
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!(sb->s_flags & MS_RDONLY) && !aborted) {
ext4_clear_feature_journal_needs_recovery(sb);
es->s_state = cpu_to_le16(sbi->s_mount_state);
}
@@ -1100,12 +1120,6 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
}
-static int ext4_key_prefix(struct inode *inode, u8 **key)
-{
- *key = EXT4_SB(inode->i_sb)->key_prefix;
- return EXT4_SB(inode->i_sb)->key_prefix_size;
-}
-
static int ext4_prepare_context(struct inode *inode)
{
return ext4_convert_inline_data(inode);
@@ -1180,8 +1194,8 @@ static unsigned ext4_max_namelen(struct inode *inode)
}
static struct fscrypt_operations ext4_cryptops = {
+ .key_prefix = "ext4:",
.get_context = ext4_get_context,
- .key_prefix = ext4_key_prefix,
.prepare_context = ext4_prepare_context,
.set_context = ext4_set_context,
.dummy_context = ext4_dummy_context,
@@ -1290,7 +1304,7 @@ enum {
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
- Opt_lazytime, Opt_nolazytime,
+ Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
@@ -1358,6 +1372,7 @@ static const match_table_t tokens = {
{Opt_delalloc, "delalloc"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
+ {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
{Opt_nodelalloc, "nodelalloc"},
{Opt_removed, "mblk_io_submit"},
{Opt_removed, "nomblk_io_submit"},
@@ -1563,6 +1578,7 @@ static const struct mount_opts {
#endif
{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
+ {Opt_debug_want_extra_isize, 0, MOPT_GTE0},
{Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
{Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
MOPT_SET | MOPT_Q},
@@ -1676,6 +1692,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
if (arg == 0)
arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
sbi->s_commit_interval = HZ * arg;
+ } else if (token == Opt_debug_want_extra_isize) {
+ sbi->s_want_extra_isize = arg;
} else if (token == Opt_max_batch_time) {
sbi->s_max_batch_time = arg;
} else if (token == Opt_min_batch_time) {
@@ -2619,9 +2637,9 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
ret = sbi->s_stripe;
- else if (stripe_width <= sbi->s_blocks_per_group)
+ else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
ret = stripe_width;
- else if (stride <= sbi->s_blocks_per_group)
+ else if (stride && stride <= sbi->s_blocks_per_group)
ret = stride;
else
ret = 0;
@@ -3842,7 +3860,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
if (ext4_has_feature_meta_bg(sb)) {
- if (le32_to_cpu(es->s_first_meta_bg) >= db_count) {
+ if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
ext4_msg(sb, KERN_WARNING,
"first meta block group too large: %u "
"(group descriptor block count %u)",
@@ -3925,7 +3943,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* root first: it may be modified in the journal!
*/
if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
- if (ext4_load_journal(sb, es, journal_devnum))
+ err = ext4_load_journal(sb, es, journal_devnum);
+ if (err)
goto failed_mount3a;
} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
ext4_has_feature_journal_needs_recovery(sb)) {
@@ -4087,7 +4106,8 @@ no_journal:
sb->s_flags |= MS_RDONLY;
/* determine the minimum size of new large inodes, if present */
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
+ sbi->s_want_extra_isize == 0) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
if (ext4_has_feature_extra_isize(sb)) {
@@ -4218,11 +4238,6 @@ no_journal:
ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
kfree(orig_data);
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- memcpy(sbi->key_prefix, EXT4_KEY_DESC_PREFIX,
- EXT4_KEY_DESC_PREFIX_SIZE);
- sbi->key_prefix_size = EXT4_KEY_DESC_PREFIX_SIZE;
-#endif
return 0;
cantfind_ext4:
@@ -4720,6 +4735,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
bool needs_barrier = false;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ return 0;
+
trace_ext4_sync_fs(sb, wait);
flush_workqueue(sbi->rsv_conversion_wq);
/*
@@ -4803,7 +4821,7 @@ out:
*/
static int ext4_unfreeze(struct super_block *sb)
{
- if (sb->s_flags & MS_RDONLY)
+ if ((sb->s_flags & MS_RDONLY) || ext4_forced_shutdown(EXT4_SB(sb)))
return 0;
if (EXT4_SB(sb)->s_journal) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 5a94fa52b74f..67636acf7624 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -411,6 +411,9 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
{
int error;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
if (strlen(name) > 255)
return -ERANGE;
@@ -1188,16 +1191,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
struct ext4_xattr_block_find bs = {
.s = { .not_found = -ENODATA, },
};
- unsigned long no_expand;
+ int no_expand;
int error;
if (!name)
return -EINVAL;
if (strlen(name) > 255)
return -ERANGE;
- down_write(&EXT4_I(inode)->xattr_sem);
- no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
- ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ ext4_write_lock_xattr(inode, &no_expand);
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
if (error)
@@ -1264,7 +1265,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
ext4_xattr_update_super_block(handle, inode->i_sb);
inode->i_ctime = current_time(inode);
if (!value)
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ no_expand = 0;
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext4_mark_iloc_dirty, even with
@@ -1278,9 +1279,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
cleanup:
brelse(is.iloc.bh);
brelse(bs.bh);
- if (no_expand == 0)
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return error;
}
@@ -1497,12 +1496,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
int error = 0, tried_min_extra_isize = 0;
int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
int isize_diff; /* How much do we need to grow i_extra_isize */
+ int no_expand;
+
+ if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
+ return 0;
- down_write(&EXT4_I(inode)->xattr_sem);
- /*
- * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
- */
- ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
retry:
isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
@@ -1584,17 +1582,16 @@ shift:
EXT4_I(inode)->i_extra_isize = new_extra_isize;
brelse(bh);
out:
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return 0;
cleanup:
brelse(bh);
/*
- * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
- * size expansion failed.
+ * Inode size expansion failed; don't try again
*/
- up_write(&EXT4_I(inode)->xattr_sem);
+ no_expand = 1;
+ ext4_write_unlock_xattr(inode, &no_expand);
return error;
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index a92e783fa057..099c8b670ef5 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -102,6 +102,38 @@ extern const struct xattr_handler ext4_xattr_security_handler;
#define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
+/*
+ * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes.
+ * The first is to signal that there the inline xattrs and data are
+ * taking up so much space that we might as well not keep trying to
+ * expand it. The second is that xattr_sem is taken for writing, so
+ * we shouldn't try to recurse into the inode expansion. For this
+ * second case, we need to make sure that we take save and restore the
+ * NO_EXPAND state flag appropriately.
+ */
+static inline void ext4_write_lock_xattr(struct inode *inode, int *save)
+{
+ down_write(&EXT4_I(inode)->xattr_sem);
+ *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+}
+
+static inline int ext4_write_trylock_xattr(struct inode *inode, int *save)
+{
+ if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0)
+ return 0;
+ *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ return 1;
+}
+
+static inline void ext4_write_unlock_xattr(struct inode *inode, int *save)
+{
+ if (*save == 0)
+ ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ up_write(&EXT4_I(inode)->xattr_sem);
+}
+
extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f73ee9534d83..e6e42a4b1344 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -249,7 +249,8 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
- f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
+ f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
+ 0, page->index, META, WRITE);
unlock_page(page);
@@ -891,7 +892,7 @@ retry:
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return 0;
}
- fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
+ fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[type]);
if (inode) {
@@ -924,7 +925,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return 0;
}
- fi = list_entry(head->next, struct f2fs_inode_info,
+ fi = list_first_entry(head, struct f2fs_inode_info,
gdirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
@@ -1248,15 +1249,20 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_merged_bios(sbi);
/* this is the case of multiple fstrims without any changes */
- if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
- f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
- f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
- f2fs_bug_on(sbi, prefree_segments(sbi));
- flush_sit_entries(sbi, cpc);
- clear_prefree_segments(sbi, cpc);
- f2fs_wait_all_discard_bio(sbi);
- unblock_operations(sbi);
- goto out;
+ if (cpc->reason == CP_DISCARD) {
+ if (!exist_trim_candidates(sbi, cpc)) {
+ unblock_operations(sbi);
+ goto out;
+ }
+
+ if (NM_I(sbi)->dirty_nat_cnt == 0 &&
+ SIT_I(sbi)->dirty_sentries == 0 &&
+ prefree_segments(sbi) == 0) {
+ flush_sit_entries(sbi, cpc);
+ clear_prefree_segments(sbi, cpc);
+ unblock_operations(sbi);
+ goto out;
+ }
}
/*
@@ -1273,12 +1279,10 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* unlock all the fs_lock[] in do_checkpoint() */
err = do_checkpoint(sbi, cpc);
- if (err) {
+ if (err)
release_discard_addrs(sbi);
- } else {
+ else
clear_prefree_segments(sbi, cpc);
- f2fs_wait_all_discard_bio(sbi);
- }
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9ac262564fa6..8c61fa7fd27d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -93,6 +93,17 @@ static void f2fs_write_end_io(struct bio *bio)
struct page *page = bvec->bv_page;
enum count_type type = WB_DATA_TYPE(page);
+ if (IS_DUMMY_WRITTEN_PAGE(page)) {
+ set_page_private(page, (unsigned long)NULL);
+ ClearPagePrivate(page);
+ unlock_page(page);
+ mempool_free(page, sbi->write_io_dummy);
+
+ if (unlikely(bio->bi_error))
+ f2fs_stop_checkpoint(sbi, true);
+ continue;
+ }
+
fscrypt_pullback_bio_page(&page, true);
if (unlikely(bio->bi_error)) {
@@ -171,10 +182,46 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
struct bio *bio, enum page_type type)
{
if (!is_read_io(bio_op(bio))) {
+ unsigned int start;
+
if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
current->plug && (type == DATA || type == NODE))
blk_finish_plug(current->plug);
+
+ if (type != DATA && type != NODE)
+ goto submit_io;
+
+ start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
+ start %= F2FS_IO_SIZE(sbi);
+
+ if (start == 0)
+ goto submit_io;
+
+ /* fill dummy pages */
+ for (; start < F2FS_IO_SIZE(sbi); start++) {
+ struct page *page =
+ mempool_alloc(sbi->write_io_dummy,
+ GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+ f2fs_bug_on(sbi, !page);
+
+ SetPagePrivate(page);
+ set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
+ lock_page(page);
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
+ f2fs_bug_on(sbi, 1);
+ }
+ /*
+ * In the NODE case, we lose next block address chain. So, we
+ * need to do checkpoint in f2fs_sync_file.
+ */
+ if (type == NODE)
+ set_sbi_flag(sbi, SBI_NEED_CP);
}
+submit_io:
+ if (is_read_io(bio_op(bio)))
+ trace_f2fs_submit_read_bio(sbi->sb, type, bio);
+ else
+ trace_f2fs_submit_write_bio(sbi->sb, type, bio);
submit_bio(bio);
}
@@ -185,19 +232,19 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
if (!io->bio)
return;
+ bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+
if (is_read_io(fio->op))
- trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
+ trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
else
- trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
-
- bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+ trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
__submit_bio(io->sbi, io->bio, fio->type);
io->bio = NULL;
}
-static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
- struct page *page, nid_t ino)
+static bool __has_merged_page(struct f2fs_bio_info *io,
+ struct inode *inode, nid_t ino, pgoff_t idx)
{
struct bio_vec *bvec;
struct page *target;
@@ -206,7 +253,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
if (!io->bio)
return false;
- if (!inode && !page && !ino)
+ if (!inode && !ino)
return true;
bio_for_each_segment_all(bvec, io->bio, i) {
@@ -216,10 +263,11 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
else
target = fscrypt_control_page(bvec->bv_page);
+ if (idx != target->index)
+ continue;
+
if (inode && inode == target->mapping->host)
return true;
- if (page && page == target)
- return true;
if (ino && ino == ino_of_node(target))
return true;
}
@@ -228,22 +276,21 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
}
static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
- struct page *page, nid_t ino,
- enum page_type type)
+ nid_t ino, pgoff_t idx, enum page_type type)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = &sbi->write_io[btype];
bool ret;
down_read(&io->io_rwsem);
- ret = __has_merged_page(io, inode, page, ino);
+ ret = __has_merged_page(io, inode, ino, idx);
up_read(&io->io_rwsem);
return ret;
}
static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
- nid_t ino, enum page_type type, int rw)
+ struct inode *inode, nid_t ino, pgoff_t idx,
+ enum page_type type, int rw)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io;
@@ -252,16 +299,16 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
down_write(&io->io_rwsem);
- if (!__has_merged_page(io, inode, page, ino))
+ if (!__has_merged_page(io, inode, ino, idx))
goto out;
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
io->fio.op = REQ_OP_WRITE;
- io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO;
+ io->fio.op_flags = REQ_META | REQ_PRIO;
if (!test_opt(sbi, NOBARRIER))
- io->fio.op_flags |= REQ_FUA;
+ io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
}
__submit_merged_bio(io);
out:
@@ -271,15 +318,15 @@ out:
void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
int rw)
{
- __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
+ __f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw);
}
void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
- nid_t ino, enum page_type type, int rw)
+ struct inode *inode, nid_t ino, pgoff_t idx,
+ enum page_type type, int rw)
{
- if (has_merged_page(sbi, inode, page, ino, type))
- __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
+ if (has_merged_page(sbi, inode, ino, idx, type))
+ __f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw);
}
void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
@@ -315,13 +362,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
return 0;
}
-void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
+int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io;
bool is_read = is_read_io(fio->op);
struct page *bio_page;
+ int err = 0;
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
@@ -331,6 +379,9 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+ /* set submitted = 1 as a return value */
+ fio->submitted = 1;
+
if (!is_read)
inc_page_count(sbi, WB_DATA_TYPE(bio_page));
@@ -342,6 +393,12 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
+ if ((fio->type == DATA || fio->type == NODE) &&
+ fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
+ err = -EAGAIN;
+ dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+ goto out_fail;
+ }
io->bio = __bio_alloc(sbi, fio->new_blkaddr,
BIO_MAX_PAGES, is_read);
io->fio = *fio;
@@ -355,9 +412,10 @@ alloc_new:
io->last_block_in_bio = fio->new_blkaddr;
f2fs_trace_ios(fio, 0);
-
+out_fail:
up_write(&io->io_rwsem);
trace_f2fs_submit_page_mbio(fio->page, fio);
+ return err;
}
static void __set_data_blkaddr(struct dnode_of_data *dn)
@@ -694,6 +752,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
struct f2fs_map_blocks map;
int err = 0;
+ if (is_inode_flag_set(inode, FI_NO_PREALLOC))
+ return 0;
+
map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
if (map.m_len > map.m_lblk)
@@ -906,7 +967,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
if (!err) {
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
- bh->b_size = map.m_len << inode->i_blkbits;
+ bh->b_size = (u64)map.m_len << inode->i_blkbits;
}
return err;
}
@@ -1088,7 +1149,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
prefetchw(&page->flags);
if (pages) {
- page = list_entry(pages->prev, struct page, lru);
+ page = list_last_entry(pages, struct page, lru);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
page->index,
@@ -1207,7 +1268,7 @@ static int f2fs_read_data_pages(struct file *file,
struct list_head *pages, unsigned nr_pages)
{
struct inode *inode = file->f_mapping->host;
- struct page *page = list_entry(pages->prev, struct page, lru);
+ struct page *page = list_last_entry(pages, struct page, lru);
trace_f2fs_readpages(inode, page, nr_pages);
@@ -1288,8 +1349,8 @@ out_writepage:
return err;
}
-static int f2fs_write_data_page(struct page *page,
- struct writeback_control *wbc)
+static int __write_data_page(struct page *page, bool *submitted,
+ struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1307,6 +1368,7 @@ static int f2fs_write_data_page(struct page *page,
.op_flags = wbc_to_write_flags(wbc),
.page = page,
.encrypted_page = NULL,
+ .submitted = false,
};
trace_f2fs_writepage(page, DATA);
@@ -1370,15 +1432,22 @@ out:
ClearPageUptodate(page);
if (wbc->for_reclaim) {
- f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
+ f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
+ DATA, WRITE);
remove_dirty_inode(inode);
+ submitted = NULL;
}
unlock_page(page);
f2fs_balance_fs(sbi, need_balance_fs);
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ submitted = NULL;
+ }
+
+ if (submitted)
+ *submitted = fio.submitted;
return 0;
@@ -1390,6 +1459,12 @@ redirty_out:
return err;
}
+static int f2fs_write_data_page(struct page *page,
+ struct writeback_control *wbc)
+{
+ return __write_data_page(page, NULL, wbc);
+}
+
/*
* This function was copied from write_cche_pages from mm/page-writeback.c.
* The major change is making write step of cold data page separately from
@@ -1406,10 +1481,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
+ pgoff_t last_idx = ULONG_MAX;
int cycled;
int range_whole = 0;
int tag;
- int nwritten = 0;
pagevec_init(&pvec, 0);
@@ -1446,6 +1521,7 @@ retry:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ bool submitted = false;
if (page->index > end) {
done = 1;
@@ -1479,7 +1555,7 @@ continue_unlock:
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = mapping->a_ops->writepage(page, wbc);
+ ret = __write_data_page(page, &submitted, wbc);
if (unlikely(ret)) {
/*
* keep nr_to_write, since vfs uses this to
@@ -1493,8 +1569,8 @@ continue_unlock:
done_index = page->index + 1;
done = 1;
break;
- } else {
- nwritten++;
+ } else if (submitted) {
+ last_idx = page->index;
}
if (--wbc->nr_to_write <= 0 &&
@@ -1516,9 +1592,9 @@ continue_unlock:
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
- if (nwritten)
+ if (last_idx != ULONG_MAX)
f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
- NULL, 0, DATA, WRITE);
+ 0, last_idx, DATA, WRITE);
return ret;
}
@@ -1598,7 +1674,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
* we already allocated all the blocks, so we don't need to get
* the block addresses when there is no need to fill the page.
*/
- if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE)
+ if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
+ !is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
if (f2fs_has_inline_data(inode) ||
@@ -1715,6 +1792,11 @@ repeat:
if (len == PAGE_SIZE || PageUptodate(page))
return 0;
+ if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
+ zero_user_segment(page, len, PAGE_SIZE);
+ return 0;
+ }
+
if (blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
@@ -1768,7 +1850,7 @@ static int f2fs_write_end(struct file *file,
* let generic_perform_write() try to copy data again through copied=0.
*/
if (!PageUptodate(page)) {
- if (unlikely(copied != PAGE_SIZE))
+ if (unlikely(copied != len))
copied = 0;
else
SetPageUptodate(page);
@@ -1917,7 +1999,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
- if (f2fs_is_atomic_file(inode)) {
+ if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
register_inmem_page(inode, page);
return 1;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fbd5184140d0..de8da9fc5c99 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,8 +50,16 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
+ si->aw_cnt = atomic_read(&sbi->aw_cnt);
+ si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
+ if (SM_I(sbi) && SM_I(sbi)->fcc_info)
+ si->nr_flush =
+ atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
+ if (SM_I(sbi) && SM_I(sbi)->dcc_info)
+ si->nr_discard =
+ atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
@@ -62,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->inline_xattr = atomic_read(&sbi->inline_xattr);
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
+ si->append = sbi->im[APPEND_INO].ino_num;
+ si->update = sbi->im[UPDATE_INO].ino_num;
si->orphans = sbi->im[ORPHAN_INO].ino_num;
si->utilization = utilization(sbi);
@@ -192,8 +202,10 @@ get_cache:
si->cache_mem += sizeof(struct f2fs_gc_kthread);
/* build merge flush thread */
- if (SM_I(sbi)->cmd_control_info)
+ if (SM_I(sbi)->fcc_info)
si->cache_mem += sizeof(struct flush_cmd_control);
+ if (SM_I(sbi)->dcc_info)
+ si->cache_mem += sizeof(struct discard_cmd_control);
/* free nids */
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
@@ -254,8 +266,8 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
- seq_printf(s, " - Orphan Inode: %u\n",
- si->orphans);
+ seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
+ si->orphans, si->append, si->update);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
@@ -314,8 +326,11 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n",
- si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data);
+ seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
+ si->nr_wb_cp_data, si->nr_wb_data,
+ si->nr_flush, si->nr_discard);
+ seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
+ si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
@@ -414,6 +429,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->inplace_count, 0);
+ atomic_set(&sbi->aw_cnt, 0);
+ atomic_set(&sbi->max_aw_cnt, 0);
+
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 827c5daef4fc..18607fc5240d 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -268,7 +268,10 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
err = fscrypt_setup_filename(dir, child, 1, &fname);
if (err) {
- *res_page = ERR_PTR(err);
+ if (err == -ENOENT)
+ *res_page = NULL;
+ else
+ *res_page = ERR_PTR(err);
return NULL;
}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 4db44da7ef69..6ed6424807b6 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -311,28 +311,24 @@ static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
tmp_node = parent;
if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
- *next_ex = tmp_node ?
- rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+ *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
- *prev_ex = tmp_node ?
- rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+ *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
return NULL;
lookup_neighbors:
if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&en->rb_node);
- *prev_ex = tmp_node ?
- rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+ *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&en->rb_node);
- *next_ex = tmp_node ?
- rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+ *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
return en;
}
@@ -352,11 +348,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
}
if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
- if (en)
- __release_extent_node(sbi, et, prev_ex);
next_ex->ei.fofs = ei->fofs;
next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
+ if (en)
+ __release_extent_node(sbi, et, prev_ex);
+
en = next_ex;
}
@@ -492,9 +489,8 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (!next_en) {
struct rb_node *node = rb_next(&en->rb_node);
- next_en = node ?
- rb_entry(node, struct extent_node, rb_node)
- : NULL;
+ next_en = rb_entry_safe(node, struct extent_node,
+ rb_node);
}
if (parts)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2da8c3aa0ce5..975879f0f50b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -22,7 +22,11 @@
#include <linux/vmalloc.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
-#include <linux/fscrypto.h>
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#include <linux/fscrypt_supp.h>
+#else
+#include <linux/fscrypt_notsupp.h>
+#endif
#include <crypto/hash.h>
#ifdef CONFIG_F2FS_CHECK_FS
@@ -108,9 +112,9 @@ struct f2fs_mount_info {
#define F2FS_HAS_FEATURE(sb, mask) \
((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
#define F2FS_SET_FEATURE(sb, mask) \
- F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)
+ (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
#define F2FS_CLEAR_FEATURE(sb, mask) \
- F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)
+ (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
/*
* For checkpoint manager
@@ -128,11 +132,8 @@ enum {
CP_DISCARD,
};
-#define DEF_BATCHED_TRIM_SECTIONS 2
-#define BATCHED_TRIM_SEGMENTS(sbi) \
- (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
-#define BATCHED_TRIM_BLOCKS(sbi) \
- (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
+#define MAX_DISCARD_BLOCKS(sbi) (1 << (sbi)->log_blocks_per_seg)
+#define DISCARD_ISSUE_RATE 8
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
@@ -181,11 +182,30 @@ struct discard_entry {
int len; /* # of consecutive blocks of the discard */
};
-struct bio_entry {
- struct list_head list;
- struct bio *bio;
- struct completion event;
- int error;
+enum {
+ D_PREP,
+ D_SUBMIT,
+ D_DONE,
+};
+
+struct discard_cmd {
+ struct list_head list; /* command list */
+ struct completion wait; /* compleation */
+ block_t lstart; /* logical start address */
+ block_t len; /* length */
+ struct bio *bio; /* bio */
+ int state; /* state */
+};
+
+struct discard_cmd_control {
+ struct task_struct *f2fs_issue_discard; /* discard thread */
+ struct list_head discard_entry_list; /* 4KB discard entry list */
+ int nr_discards; /* # of discards in the list */
+ struct list_head discard_cmd_list; /* discard cmd list */
+ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
+ struct mutex cmd_lock;
+ int max_discards; /* max. discards to be issued */
+ atomic_t submit_discard; /* # of issued discard */
};
/* for the list of fsync inodes, used only during recovery */
@@ -210,6 +230,7 @@ struct fsync_inode_entry {
static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
{
int before = nats_in_cursum(journal);
+
journal->n_nats = cpu_to_le16(before + i);
return before;
}
@@ -217,6 +238,7 @@ static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i)
{
int before = sits_in_cursum(journal);
+
journal->n_sits = cpu_to_le16(before + i);
return before;
}
@@ -302,12 +324,14 @@ static inline void make_dentry_ptr(struct inode *inode,
if (type == 1) {
struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
+
d->max = NR_DENTRY_IN_BLOCK;
d->bitmap = &t->dentry_bitmap;
d->dentry = t->dentry;
d->filename = t->filename;
} else {
struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
+
d->max = NR_INLINE_DENTRY;
d->bitmap = &t->dentry_bitmap;
d->dentry = t->dentry;
@@ -496,7 +520,7 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
return __is_extent_mergeable(cur, front);
}
-extern void f2fs_mark_inode_dirty_sync(struct inode *, bool);
+extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
static inline void __try_update_largest_extent(struct inode *inode,
struct extent_tree *et, struct extent_node *en)
{
@@ -538,6 +562,9 @@ struct f2fs_nm_info {
/* for checkpoint */
char *nat_bitmap; /* NAT bitmap pointer */
+#ifdef CONFIG_F2FS_CHECK_FS
+ char *nat_bitmap_mir; /* NAT bitmap mirror */
+#endif
int bitmap_size; /* bitmap size */
};
@@ -628,15 +655,6 @@ struct f2fs_sm_info {
/* a threshold to reclaim prefree segments */
unsigned int rec_prefree_segments;
- /* for small discard management */
- struct list_head discard_list; /* 4KB discard list */
- struct list_head wait_list; /* linked with issued discard bio */
- int nr_discards; /* # of discards in the list */
- int max_discards; /* max. discards to be issued */
-
- /* for batched trimming */
- unsigned int trim_sections; /* # of sections to trim */
-
struct list_head sit_entry_set; /* sit entry set list */
unsigned int ipu_policy; /* in-place-update policy */
@@ -644,8 +662,10 @@ struct f2fs_sm_info {
unsigned int min_fsync_blocks; /* threshold for fsync */
/* for flush command control */
- struct flush_cmd_control *cmd_control_info;
+ struct flush_cmd_control *fcc_info;
+ /* for discard command control */
+ struct discard_cmd_control *dcc_info;
};
/*
@@ -704,6 +724,7 @@ struct f2fs_io_info {
block_t old_blkaddr; /* old block address before Cow */
struct page *page; /* page to be written */
struct page *encrypted_page; /* encrypted page */
+ bool submitted; /* indicate IO submission */
};
#define is_read_io(rw) (rw == READ)
@@ -760,10 +781,6 @@ enum {
MAX_TIME,
};
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-#define F2FS_KEY_DESC_PREFIX "f2fs:"
-#define F2FS_KEY_DESC_PREFIX_SIZE 5
-#endif
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
@@ -771,11 +788,6 @@ struct f2fs_sb_info {
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE];
- u8 key_prefix_size;
-#endif
-
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
unsigned int log_blocks_per_blkz; /* log2 F2FS blocks per zone */
@@ -792,6 +804,8 @@ struct f2fs_sb_info {
struct f2fs_bio_info read_io; /* for read bios */
struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */
+ int write_io_size_bits; /* Write IO size bits */
+ mempool_t *write_io_dummy; /* Dummy pages */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -882,6 +896,8 @@ struct f2fs_sb_info {
atomic_t inline_xattr; /* # of inline_xattr inodes */
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
+ atomic_t aw_cnt; /* # of atomic writes */
+ atomic_t max_aw_cnt; /* max # of atomic writes */
int bg_gc; /* background gc calls */
unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
#endif
@@ -1454,6 +1470,7 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
{
#ifdef CONFIG_F2FS_FAULT_INJECTION
struct page *page = find_lock_page(mapping, index);
+
if (page)
return page;
@@ -1537,6 +1554,7 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
static inline bool IS_INODE(struct page *page)
{
struct f2fs_node *p = F2FS_NODE(page);
+
return RAW_IS_INODE(p);
}
@@ -1550,6 +1568,7 @@ static inline block_t datablock_addr(struct page *node_page,
{
struct f2fs_node *raw_node;
__le32 *addr_array;
+
raw_node = F2FS_NODE(node_page);
addr_array = blkaddr_in_node(raw_node);
return le32_to_cpu(addr_array[offset]);
@@ -1633,6 +1652,7 @@ enum {
FI_UPDATE_WRITE, /* inode has in-place-update data */
FI_NEED_IPU, /* used for ipu per file */
FI_ATOMIC_FILE, /* indicate atomic file */
+ FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
FI_VOLATILE_FILE, /* indicate volatile file */
FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
FI_DROP_CACHE, /* drop dirty page cache */
@@ -1640,6 +1660,7 @@ enum {
FI_INLINE_DOTS, /* indicate inline dot dentries */
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
+ FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
};
static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -1784,6 +1805,7 @@ static inline unsigned int addrs_per_inode(struct inode *inode)
static inline void *inline_xattr_addr(struct page *page)
{
struct f2fs_inode *ri = F2FS_INODE(page);
+
return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
F2FS_INLINE_XATTR_ADDRS]);
}
@@ -1822,6 +1844,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode)
return is_inode_flag_set(inode, FI_ATOMIC_FILE);
}
+static inline bool f2fs_is_commit_atomic_write(struct inode *inode)
+{
+ return is_inode_flag_set(inode, FI_ATOMIC_COMMIT);
+}
+
static inline bool f2fs_is_volatile_file(struct inode *inode)
{
return is_inode_flag_set(inode, FI_VOLATILE_FILE);
@@ -1840,6 +1867,7 @@ static inline bool f2fs_is_drop_cache(struct inode *inode)
static inline void *inline_data_addr(struct page *page)
{
struct f2fs_inode *ri = F2FS_INODE(page);
+
return (void *)&(ri->i_addr[1]);
}
@@ -1962,29 +1990,30 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
/*
* file.c
*/
-int f2fs_sync_file(struct file *, loff_t, loff_t, int);
-void truncate_data_blocks(struct dnode_of_data *);
-int truncate_blocks(struct inode *, u64, bool);
-int f2fs_truncate(struct inode *);
-int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-int f2fs_setattr(struct dentry *, struct iattr *);
-int truncate_hole(struct inode *, pgoff_t, pgoff_t);
-int truncate_data_blocks_range(struct dnode_of_data *, int);
-long f2fs_ioctl(struct file *, unsigned int, unsigned long);
-long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
+int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
+void truncate_data_blocks(struct dnode_of_data *dn);
+int truncate_blocks(struct inode *inode, u64 from, bool lock);
+int f2fs_truncate(struct inode *inode);
+int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat);
+int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
+int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
+int truncate_data_blocks_range(struct dnode_of_data *dn, int count);
+long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/*
* inode.c
*/
-void f2fs_set_inode_flags(struct inode *);
-struct inode *f2fs_iget(struct super_block *, unsigned long);
-struct inode *f2fs_iget_retry(struct super_block *, unsigned long);
-int try_to_free_nats(struct f2fs_sb_info *, int);
-int update_inode(struct inode *, struct page *);
-int update_inode_page(struct inode *);
-int f2fs_write_inode(struct inode *, struct writeback_control *);
-void f2fs_evict_inode(struct inode *);
-void handle_failed_inode(struct inode *);
+void f2fs_set_inode_flags(struct inode *inode);
+struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
+struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
+int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
+int update_inode(struct inode *inode, struct page *node_page);
+int update_inode_page(struct inode *inode);
+int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
+void f2fs_evict_inode(struct inode *inode);
+void handle_failed_inode(struct inode *inode);
/*
* namei.c
@@ -1994,40 +2023,47 @@ struct dentry *f2fs_get_parent(struct dentry *child);
/*
* dir.c
*/
-void set_de_type(struct f2fs_dir_entry *, umode_t);
-unsigned char get_de_type(struct f2fs_dir_entry *);
-struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
- f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
-int f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
- unsigned int, struct fscrypt_str *);
-void do_make_empty_dir(struct inode *, struct inode *,
- struct f2fs_dentry_ptr *);
-struct page *init_inode_metadata(struct inode *, struct inode *,
- const struct qstr *, const struct qstr *, struct page *);
-void update_parent_metadata(struct inode *, struct inode *, unsigned int);
-int room_for_filename(const void *, int, int);
-void f2fs_drop_nlink(struct inode *, struct inode *);
-struct f2fs_dir_entry *__f2fs_find_entry(struct inode *, struct fscrypt_name *,
- struct page **);
-struct f2fs_dir_entry *f2fs_find_entry(struct inode *, const struct qstr *,
- struct page **);
-struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
-ino_t f2fs_inode_by_name(struct inode *, const struct qstr *, struct page **);
-void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
- struct page *, struct inode *);
-int update_dent_inode(struct inode *, struct inode *, const struct qstr *);
-void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
- const struct qstr *, f2fs_hash_t , unsigned int);
-int f2fs_add_regular_entry(struct inode *, const struct qstr *,
- const struct qstr *, struct inode *, nid_t, umode_t);
-int __f2fs_do_add_link(struct inode *, struct fscrypt_name*, struct inode *,
- nid_t, umode_t);
-int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
- umode_t);
-void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
- struct inode *);
-int f2fs_do_tmpfile(struct inode *, struct inode *);
-bool f2fs_empty_dir(struct inode *);
+void set_de_type(struct f2fs_dir_entry *de, umode_t mode);
+unsigned char get_de_type(struct f2fs_dir_entry *de);
+struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
+ f2fs_hash_t namehash, int *max_slots,
+ struct f2fs_dentry_ptr *d);
+int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
+ unsigned int start_pos, struct fscrypt_str *fstr);
+void do_make_empty_dir(struct inode *inode, struct inode *parent,
+ struct f2fs_dentry_ptr *d);
+struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+ const struct qstr *new_name,
+ const struct qstr *orig_name, struct page *dpage);
+void update_parent_metadata(struct inode *dir, struct inode *inode,
+ unsigned int current_depth);
+int room_for_filename(const void *bitmap, int slots, int max_slots);
+void f2fs_drop_nlink(struct inode *dir, struct inode *inode);
+struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
+ struct fscrypt_name *fname, struct page **res_page);
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
+ const struct qstr *child, struct page **res_page);
+struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p);
+ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr,
+ struct page **page);
+void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
+ struct page *page, struct inode *inode);
+int update_dent_inode(struct inode *inode, struct inode *to,
+ const struct qstr *name);
+void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
+ const struct qstr *name, f2fs_hash_t name_hash,
+ unsigned int bit_pos);
+int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
+ const struct qstr *orig_name,
+ struct inode *inode, nid_t ino, umode_t mode);
+int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
+ struct inode *inode, nid_t ino, umode_t mode);
+int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+ struct inode *inode, nid_t ino, umode_t mode);
+void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
+ struct inode *dir, struct inode *inode);
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir);
+bool f2fs_empty_dir(struct inode *dir);
static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
{
@@ -2038,18 +2074,18 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
/*
* super.c
*/
-int f2fs_inode_dirtied(struct inode *, bool);
-void f2fs_inode_synced(struct inode *);
-int f2fs_commit_super(struct f2fs_sb_info *, bool);
-int f2fs_sync_fs(struct super_block *, int);
+int f2fs_inode_dirtied(struct inode *inode, bool sync);
+void f2fs_inode_synced(struct inode *inode);
+int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
+int f2fs_sync_fs(struct super_block *sb, int sync);
extern __printf(3, 4)
-void f2fs_msg(struct super_block *, const char *, const char *, ...);
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
int sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info);
/*
* node.c
@@ -2057,163 +2093,183 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
struct dnode_of_data;
struct node_info;
-bool available_free_memory(struct f2fs_sb_info *, int);
-int need_dentry_mark(struct f2fs_sb_info *, nid_t);
-bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
-bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
-void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
-pgoff_t get_next_page_offset(struct dnode_of_data *, pgoff_t);
-int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
-int truncate_inode_blocks(struct inode *, pgoff_t);
-int truncate_xattr_node(struct inode *, struct page *);
-int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
-int remove_inode_page(struct inode *);
-struct page *new_inode_page(struct inode *);
-struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
-void ra_node_page(struct f2fs_sb_info *, nid_t);
-struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_node_page_ra(struct page *, int);
-void move_node_page(struct page *, int);
-int fsync_node_pages(struct f2fs_sb_info *, struct inode *,
- struct writeback_control *, bool);
-int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
-void build_free_nids(struct f2fs_sb_info *, bool);
-bool alloc_nid(struct f2fs_sb_info *, nid_t *);
-void alloc_nid_done(struct f2fs_sb_info *, nid_t);
-void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
-int try_to_free_nids(struct f2fs_sb_info *, int);
-void recover_inline_xattr(struct inode *, struct page *);
-void recover_xattr_data(struct inode *, struct page *, block_t);
-int recover_inode_page(struct f2fs_sb_info *, struct page *);
-int restore_node_summary(struct f2fs_sb_info *, unsigned int,
- struct f2fs_summary_block *);
-void flush_nat_entries(struct f2fs_sb_info *);
-int build_node_manager(struct f2fs_sb_info *);
-void destroy_node_manager(struct f2fs_sb_info *);
+bool available_free_memory(struct f2fs_sb_info *sbi, int type);
+int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
+bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
+bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
+void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
+pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
+int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
+int truncate_inode_blocks(struct inode *inode, pgoff_t from);
+int truncate_xattr_node(struct inode *inode, struct page *page);
+int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
+int remove_inode_page(struct inode *inode);
+struct page *new_inode_page(struct inode *inode);
+struct page *new_node_page(struct dnode_of_data *dn,
+ unsigned int ofs, struct page *ipage);
+void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
+struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
+struct page *get_node_page_ra(struct page *parent, int start);
+void move_node_page(struct page *node_page, int gc_type);
+int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
+ struct writeback_control *wbc, bool atomic);
+int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync);
+bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
+void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
+void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
+int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
+void recover_inline_xattr(struct inode *inode, struct page *page);
+void recover_xattr_data(struct inode *inode, struct page *page,
+ block_t blkaddr);
+int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
+int restore_node_summary(struct f2fs_sb_info *sbi,
+ unsigned int segno, struct f2fs_summary_block *sum);
+void flush_nat_entries(struct f2fs_sb_info *sbi);
+int build_node_manager(struct f2fs_sb_info *sbi);
+void destroy_node_manager(struct f2fs_sb_info *sbi);
int __init create_node_manager_caches(void);
void destroy_node_manager_caches(void);
/*
* segment.c
*/
-void register_inmem_page(struct inode *, struct page *);
-void drop_inmem_pages(struct inode *);
-int commit_inmem_pages(struct inode *);
-void f2fs_balance_fs(struct f2fs_sb_info *, bool);
-void f2fs_balance_fs_bg(struct f2fs_sb_info *);
-int f2fs_issue_flush(struct f2fs_sb_info *);
-int create_flush_cmd_control(struct f2fs_sb_info *);
-void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
-void invalidate_blocks(struct f2fs_sb_info *, block_t);
-bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
-void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
-void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
-void release_discard_addrs(struct f2fs_sb_info *);
-int npages_for_summary_flush(struct f2fs_sb_info *, bool);
-void allocate_new_segments(struct f2fs_sb_info *);
-int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
-struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
-void update_meta_page(struct f2fs_sb_info *, void *, block_t);
-void write_meta_page(struct f2fs_sb_info *, struct page *);
-void write_node_page(unsigned int, struct f2fs_io_info *);
-void write_data_page(struct dnode_of_data *, struct f2fs_io_info *);
-void rewrite_data_page(struct f2fs_io_info *);
-void __f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *,
- block_t, block_t, bool, bool);
-void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
- block_t, block_t, unsigned char, bool, bool);
-void allocate_data_block(struct f2fs_sb_info *, struct page *,
- block_t, block_t *, struct f2fs_summary *, int);
-void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
-void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
-void write_data_summaries(struct f2fs_sb_info *, block_t);
-void write_node_summaries(struct f2fs_sb_info *, block_t);
-int lookup_journal_in_cursum(struct f2fs_journal *, int, unsigned int, int);
-void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *);
-int build_segment_manager(struct f2fs_sb_info *);
-void destroy_segment_manager(struct f2fs_sb_info *);
+void register_inmem_page(struct inode *inode, struct page *page);
+void drop_inmem_pages(struct inode *inode);
+int commit_inmem_pages(struct inode *inode);
+void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
+int f2fs_issue_flush(struct f2fs_sb_info *sbi);
+int create_flush_cmd_control(struct f2fs_sb_info *sbi);
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
+void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
+bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
+void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
+void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr);
+void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+void release_discard_addrs(struct f2fs_sb_info *sbi);
+int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void allocate_new_segments(struct f2fs_sb_info *sbi);
+int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
+bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
+void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
+void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
+void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
+void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
+void rewrite_data_page(struct f2fs_io_info *fio);
+void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+ block_t old_blkaddr, block_t new_blkaddr,
+ bool recover_curseg, bool recover_newaddr);
+void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
+ block_t old_addr, block_t new_addr,
+ unsigned char version, bool recover_curseg,
+ bool recover_newaddr);
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+ block_t old_blkaddr, block_t *new_blkaddr,
+ struct f2fs_summary *sum, int type);
+void f2fs_wait_on_page_writeback(struct page *page,
+ enum page_type type, bool ordered);
+void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
+ block_t blkaddr);
+void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
+ unsigned int val, int alloc);
+void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int build_segment_manager(struct f2fs_sb_info *sbi);
+void destroy_segment_manager(struct f2fs_sb_info *sbi);
int __init create_segment_manager_caches(void);
void destroy_segment_manager_caches(void);
/*
* checkpoint.c
*/
-void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool);
-struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
-bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
-int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
-void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
-long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
-void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
-void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
-void release_ino_entry(struct f2fs_sb_info *, bool);
-bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
-int f2fs_sync_inode_meta(struct f2fs_sb_info *);
-int acquire_orphan_inode(struct f2fs_sb_info *);
-void release_orphan_inode(struct f2fs_sb_info *);
-void add_orphan_inode(struct inode *);
-void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
-int recover_orphan_inodes(struct f2fs_sb_info *);
-int get_valid_checkpoint(struct f2fs_sb_info *);
-void update_dirty_page(struct inode *, struct page *);
-void remove_dirty_inode(struct inode *);
-int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type);
-int write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
-void init_ino_entry_info(struct f2fs_sb_info *);
+void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
+struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
+bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
+int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+ int type, bool sync);
+void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
+long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+ long nr_to_write);
+void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
+int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
+int acquire_orphan_inode(struct f2fs_sb_info *sbi);
+void release_orphan_inode(struct f2fs_sb_info *sbi);
+void add_orphan_inode(struct inode *inode);
+void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
+int recover_orphan_inodes(struct f2fs_sb_info *sbi);
+int get_valid_checkpoint(struct f2fs_sb_info *sbi);
+void update_dirty_page(struct inode *inode, struct page *page);
+void remove_dirty_inode(struct inode *inode);
+int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+void init_ino_entry_info(struct f2fs_sb_info *sbi);
int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);
/*
* data.c
*/
-void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
-void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
- struct page *, nid_t, enum page_type, int);
-void f2fs_flush_merged_bios(struct f2fs_sb_info *);
-int f2fs_submit_page_bio(struct f2fs_io_info *);
-void f2fs_submit_page_mbio(struct f2fs_io_info *);
-struct block_device *f2fs_target_device(struct f2fs_sb_info *,
- block_t, struct bio *);
-int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
-void set_data_blkaddr(struct dnode_of_data *);
-void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
-int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
-int reserve_new_block(struct dnode_of_data *);
-int f2fs_get_block(struct dnode_of_data *, pgoff_t);
-int f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
-int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
-struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
-struct page *find_data_page(struct inode *, pgoff_t);
-struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
-struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
-int do_write_data_page(struct f2fs_io_info *);
-int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int);
-int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
-void f2fs_set_page_dirty_nobuffers(struct page *);
-void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
-int f2fs_release_page(struct page *, gfp_t);
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
+ int rw);
+void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+ struct inode *inode, nid_t ino, pgoff_t idx,
+ enum page_type type, int rw);
+void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi);
+int f2fs_submit_page_bio(struct f2fs_io_info *fio);
+int f2fs_submit_page_mbio(struct f2fs_io_info *fio);
+struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
+ block_t blk_addr, struct bio *bio);
+int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
+void set_data_blkaddr(struct dnode_of_data *dn);
+void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
+int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
+int reserve_new_block(struct dnode_of_data *dn);
+int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
+int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
+int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
+struct page *get_read_data_page(struct inode *inode, pgoff_t index,
+ int op_flags, bool for_write);
+struct page *find_data_page(struct inode *inode, pgoff_t index);
+struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
+ bool for_write);
+struct page *get_new_data_page(struct inode *inode,
+ struct page *ipage, pgoff_t index, bool new_i_size);
+int do_write_data_page(struct f2fs_io_info *fio);
+int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
+ int create, int flag);
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len);
+void f2fs_set_page_dirty_nobuffers(struct page *page);
+void f2fs_invalidate_page(struct page *page, unsigned int offset,
+ unsigned int length);
+int f2fs_release_page(struct page *page, gfp_t wait);
#ifdef CONFIG_MIGRATION
-int f2fs_migrate_page(struct address_space *, struct page *, struct page *,
- enum migrate_mode);
+int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
+ struct page *page, enum migrate_mode mode);
#endif
/*
* gc.c
*/
-int start_gc_thread(struct f2fs_sb_info *);
-void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int, struct inode *);
-int f2fs_gc(struct f2fs_sb_info *, bool, bool);
-void build_gc_manager(struct f2fs_sb_info *);
+int start_gc_thread(struct f2fs_sb_info *sbi);
+void stop_gc_thread(struct f2fs_sb_info *sbi);
+block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
+int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background);
+void build_gc_manager(struct f2fs_sb_info *sbi);
/*
* recovery.c
*/
-int recover_fsync_data(struct f2fs_sb_info *, bool);
-bool space_for_roll_forward(struct f2fs_sb_info *);
+int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
+bool space_for_roll_forward(struct f2fs_sb_info *sbi);
/*
* debug.c
@@ -2232,8 +2288,9 @@ struct f2fs_stat_info {
unsigned int ndirty_dirs, ndirty_files, ndirty_all;
int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
int total_count, utilization;
- int bg_gc, nr_wb_cp_data, nr_wb_data;
- int inline_xattr, inline_inode, inline_dir, orphans;
+ int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
+ int inline_xattr, inline_inode, inline_dir, append, update, orphans;
+ int aw_cnt, max_aw_cnt;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
@@ -2305,6 +2362,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
((sbi)->block_count[(curseg)->alloc_type]++)
#define stat_inc_inplace_blocks(sbi) \
(atomic_inc(&(sbi)->inplace_count))
+#define stat_inc_atomic_write(inode) \
+ (atomic_inc(&F2FS_I_SB(inode)->aw_cnt))
+#define stat_dec_atomic_write(inode) \
+ (atomic_dec(&F2FS_I_SB(inode)->aw_cnt))
+#define stat_update_max_atomic_write(inode) \
+ do { \
+ int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \
+ int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \
+ if (cur > max) \
+ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \
+ } while (0)
#define stat_inc_seg_count(sbi, type, gc_type) \
do { \
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
@@ -2337,8 +2405,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \
} while (0)
-int f2fs_build_stats(struct f2fs_sb_info *);
-void f2fs_destroy_stats(struct f2fs_sb_info *);
+int f2fs_build_stats(struct f2fs_sb_info *sbi);
+void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
int __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
@@ -2358,6 +2426,9 @@ void f2fs_destroy_root_stats(void);
#define stat_dec_inline_inode(inode)
#define stat_inc_inline_dir(inode)
#define stat_dec_inline_dir(inode)
+#define stat_inc_atomic_write(inode)
+#define stat_dec_atomic_write(inode)
+#define stat_update_max_atomic_write(inode)
#define stat_inc_seg_type(sbi, curseg)
#define stat_inc_block_count(sbi, curseg)
#define stat_inc_inplace_blocks(sbi)
@@ -2387,49 +2458,55 @@ extern struct kmem_cache *inode_entry_slab;
/*
* inline.c
*/
-bool f2fs_may_inline_data(struct inode *);
-bool f2fs_may_inline_dentry(struct inode *);
-void read_inline_data(struct page *, struct page *);
-bool truncate_inline_inode(struct page *, u64);
-int f2fs_read_inline_data(struct inode *, struct page *);
-int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
-int f2fs_convert_inline_inode(struct inode *);
-int f2fs_write_inline_data(struct inode *, struct page *);
-bool recover_inline_data(struct inode *, struct page *);
-struct f2fs_dir_entry *find_in_inline_dir(struct inode *,
- struct fscrypt_name *, struct page **);
-int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
-int f2fs_add_inline_entry(struct inode *, const struct qstr *,
- const struct qstr *, struct inode *, nid_t, umode_t);
-void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
- struct inode *, struct inode *);
-bool f2fs_empty_inline_dir(struct inode *);
-int f2fs_read_inline_dir(struct file *, struct dir_context *,
- struct fscrypt_str *);
-int f2fs_inline_data_fiemap(struct inode *,
- struct fiemap_extent_info *, __u64, __u64);
+bool f2fs_may_inline_data(struct inode *inode);
+bool f2fs_may_inline_dentry(struct inode *inode);
+void read_inline_data(struct page *page, struct page *ipage);
+bool truncate_inline_inode(struct page *ipage, u64 from);
+int f2fs_read_inline_data(struct inode *inode, struct page *page);
+int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
+int f2fs_convert_inline_inode(struct inode *inode);
+int f2fs_write_inline_data(struct inode *inode, struct page *page);
+bool recover_inline_data(struct inode *inode, struct page *npage);
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+ struct fscrypt_name *fname, struct page **res_page);
+int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+ struct page *ipage);
+int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
+ const struct qstr *orig_name,
+ struct inode *inode, nid_t ino, umode_t mode);
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
+ struct inode *dir, struct inode *inode);
+bool f2fs_empty_inline_dir(struct inode *dir);
+int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
+ struct fscrypt_str *fstr);
+int f2fs_inline_data_fiemap(struct inode *inode,
+ struct fiemap_extent_info *fieinfo,
+ __u64 start, __u64 len);
/*
* shrinker.c
*/
-unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *);
-unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *);
-void f2fs_join_shrinker(struct f2fs_sb_info *);
-void f2fs_leave_shrinker(struct f2fs_sb_info *);
+unsigned long f2fs_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+unsigned long f2fs_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
+void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
+void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
/*
* extent_cache.c
*/
-unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
-bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
-void f2fs_drop_extent_tree(struct inode *);
-unsigned int f2fs_destroy_extent_node(struct inode *);
-void f2fs_destroy_extent_tree(struct inode *);
-bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
-void f2fs_update_extent_cache(struct dnode_of_data *);
+unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
+bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
+void f2fs_drop_extent_tree(struct inode *inode);
+unsigned int f2fs_destroy_extent_node(struct inode *inode);
+void f2fs_destroy_extent_tree(struct inode *inode);
+bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei);
+void f2fs_update_extent_cache(struct dnode_of_data *dn);
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
- pgoff_t, block_t, unsigned int);
-void init_extent_cache_info(struct f2fs_sb_info *);
+ pgoff_t fofs, block_t blkaddr, unsigned int len);
+void init_extent_cache_info(struct f2fs_sb_info *sbi);
int __init create_extent_cache(void);
void destroy_extent_cache(void);
@@ -2510,28 +2587,4 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
#endif
}
-#ifndef CONFIG_F2FS_FS_ENCRYPTION
-#define fscrypt_set_d_op(i)
-#define fscrypt_get_ctx fscrypt_notsupp_get_ctx
-#define fscrypt_release_ctx fscrypt_notsupp_release_ctx
-#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page
-#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page
-#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages
-#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page
-#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page
-#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range
-#define fscrypt_ioctl_set_policy fscrypt_notsupp_ioctl_set_policy
-#define fscrypt_ioctl_get_policy fscrypt_notsupp_ioctl_get_policy
-#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context
-#define fscrypt_inherit_context fscrypt_notsupp_inherit_context
-#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info
-#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info
-#define fscrypt_setup_filename fscrypt_notsupp_setup_filename
-#define fscrypt_free_filename fscrypt_notsupp_free_filename
-#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size
-#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer
-#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer
-#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr
-#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk
-#endif
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 49f10dce817d..a3808c49e326 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -20,6 +20,7 @@
#include <linux/uaccess.h>
#include <linux/mount.h>
#include <linux/pagevec.h>
+#include <linux/uio.h>
#include <linux/uuid.h>
#include <linux/file.h>
@@ -277,7 +278,8 @@ sync_nodes:
flush_out:
remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(inode, FI_UPDATE_WRITE);
- ret = f2fs_issue_flush(sbi);
+ if (!atomic)
+ ret = f2fs_issue_flush(sbi);
f2fs_update_time(sbi, REQ_TIME);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -570,6 +572,8 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
if (f2fs_has_inline_data(inode)) {
if (truncate_inline_inode(ipage, from))
set_page_dirty(ipage);
+ if (from == 0)
+ clear_inode_flag(inode, FI_DATA_EXIST);
f2fs_put_page(ipage, 1);
truncate_page = true;
goto out;
@@ -1542,6 +1546,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (ret)
clear_inode_flag(inode, FI_ATOMIC_FILE);
out:
+ stat_inc_atomic_write(inode);
+ stat_update_max_atomic_write(inode);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1565,15 +1571,18 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
goto err_out;
if (f2fs_is_atomic_file(inode)) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
ret = commit_inmem_pages(inode);
- if (ret) {
- set_inode_flag(inode, FI_ATOMIC_FILE);
+ if (ret)
goto err_out;
+
+ ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
+ if (!ret) {
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ stat_dec_atomic_write(inode);
}
+ } else {
+ ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
-
- ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
err_out:
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -2251,8 +2260,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
- int err = f2fs_preallocate_blocks(iocb, from);
+ int err;
+
+ if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
+ set_inode_flag(inode, FI_NO_PREALLOC);
+ err = f2fs_preallocate_blocks(iocb, from);
if (err) {
inode_unlock(inode);
return err;
@@ -2260,6 +2273,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
blk_finish_plug(&plug);
+ clear_inode_flag(inode, FI_NO_PREALLOC);
}
inode_unlock(inode);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 88bfc3dff496..88e5e7b10ab6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -569,6 +569,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
+ if (f2fs_is_atomic_file(inode))
+ goto out;
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
if (err)
@@ -661,6 +664,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
+ if (f2fs_is_atomic_file(inode))
+ goto out;
+
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 56c19b0610a8..98f00a3a7f50 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -321,9 +321,9 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (err)
goto err_out;
}
- if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) &&
- (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
- !fscrypt_has_permitted_context(dir, inode)) {
+ if (f2fs_encrypted_inode(dir) &&
+ (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
+ !fscrypt_has_permitted_context(dir, inode)) {
bool nokey = f2fs_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode);
err = nokey ? -ENOKEY : -EPERM;
@@ -403,7 +403,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
return err;
if (!fscrypt_has_encryption_key(dir))
- return -EPERM;
+ return -ENOKEY;
disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
sizeof(struct fscrypt_symlink_data));
@@ -447,7 +447,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
goto err_out;
if (!fscrypt_has_encryption_key(inode)) {
- err = -EPERM;
+ err = -ENOKEY;
goto err_out;
}
@@ -663,6 +663,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool is_old_inline = f2fs_has_inline_dentry(old_dir);
int err = -ENOENT;
+ if ((f2fs_encrypted_inode(old_dir) &&
+ !fscrypt_has_encryption_key(old_dir)) ||
+ (f2fs_encrypted_inode(new_dir) &&
+ !fscrypt_has_encryption_key(new_dir)))
+ return -ENOKEY;
+
if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
!fscrypt_has_permitted_context(new_dir, old_inode)) {
err = -EPERM;
@@ -843,6 +849,12 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int old_nlink = 0, new_nlink = 0;
int err = -ENOENT;
+ if ((f2fs_encrypted_inode(old_dir) &&
+ !fscrypt_has_encryption_key(old_dir)) ||
+ (f2fs_encrypted_inode(new_dir) &&
+ !fscrypt_has_encryption_key(new_dir)))
+ return -ENOKEY;
+
if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
(old_dir != new_dir) &&
(!fscrypt_has_permitted_context(new_dir, old_inode) ||
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b9078fdb3743..8203a2f8b350 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -245,12 +245,24 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
return need_update;
}
-static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
+static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
+ bool no_fail)
{
struct nat_entry *new;
- new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
- f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
+ if (no_fail) {
+ new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
+ f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
+ } else {
+ new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
+ if (!new)
+ return NULL;
+ if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
+ kmem_cache_free(nat_entry_slab, new);
+ return NULL;
+ }
+ }
+
memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid);
nat_reset_flag(new);
@@ -267,8 +279,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
- e = grab_nat_entry(nm_i, nid);
- node_info_from_raw_nat(&e->ni, ne);
+ e = grab_nat_entry(nm_i, nid, false);
+ if (e)
+ node_info_from_raw_nat(&e->ni, ne);
} else {
f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
nat_get_blkaddr(e) !=
@@ -286,7 +299,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
- e = grab_nat_entry(nm_i, ni->nid);
+ e = grab_nat_entry(nm_i, ni->nid, true);
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
@@ -1305,16 +1318,99 @@ continue_unlock:
return last_page;
}
+static int __write_node_page(struct page *page, bool atomic, bool *submitted,
+ struct writeback_control *wbc)
+{
+ struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+ nid_t nid;
+ struct node_info ni;
+ struct f2fs_io_info fio = {
+ .sbi = sbi,
+ .type = NODE,
+ .op = REQ_OP_WRITE,
+ .op_flags = wbc_to_write_flags(wbc),
+ .page = page,
+ .encrypted_page = NULL,
+ .submitted = false,
+ };
+
+ trace_f2fs_writepage(page, NODE);
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto redirty_out;
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
+
+ /* get old block addr of this node page */
+ nid = nid_of_node(page);
+ f2fs_bug_on(sbi, page->index != nid);
+
+ if (wbc->for_reclaim) {
+ if (!down_read_trylock(&sbi->node_write))
+ goto redirty_out;
+ } else {
+ down_read(&sbi->node_write);
+ }
+
+ get_node_info(sbi, nid, &ni);
+
+ /* This page is already truncated */
+ if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ ClearPageUptodate(page);
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ up_read(&sbi->node_write);
+ unlock_page(page);
+ return 0;
+ }
+
+ if (atomic && !test_opt(sbi, NOBARRIER))
+ fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
+
+ set_page_writeback(page);
+ fio.old_blkaddr = ni.blk_addr;
+ write_node_page(nid, &fio);
+ set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ up_read(&sbi->node_write);
+
+ if (wbc->for_reclaim) {
+ f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0,
+ page->index, NODE, WRITE);
+ submitted = NULL;
+ }
+
+ unlock_page(page);
+
+ if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ submitted = NULL;
+ }
+ if (submitted)
+ *submitted = fio.submitted;
+
+ return 0;
+
+redirty_out:
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
+}
+
+static int f2fs_write_node_page(struct page *page,
+ struct writeback_control *wbc)
+{
+ return __write_node_page(page, false, NULL, wbc);
+}
+
int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic)
{
pgoff_t index, end;
+ pgoff_t last_idx = ULONG_MAX;
struct pagevec pvec;
int ret = 0;
struct page *last_page = NULL;
bool marked = false;
nid_t ino = inode->i_ino;
- int nwritten = 0;
if (atomic) {
last_page = last_fsync_dnode(sbi, ino);
@@ -1336,6 +1432,7 @@ retry:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ bool submitted = false;
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_put_page(last_page, 0);
@@ -1384,13 +1481,15 @@ continue_unlock:
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
+ ret = __write_node_page(page, atomic &&
+ page == last_page,
+ &submitted, wbc);
if (ret) {
unlock_page(page);
f2fs_put_page(last_page, 0);
break;
- } else {
- nwritten++;
+ } else if (submitted) {
+ last_idx = page->index;
}
if (page == last_page) {
@@ -1416,8 +1515,9 @@ continue_unlock:
goto retry;
}
out:
- if (nwritten)
- f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE);
+ if (last_idx != ULONG_MAX)
+ f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx,
+ NODE, WRITE);
return ret ? -EIO: 0;
}
@@ -1445,6 +1545,7 @@ next_step:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+ bool submitted = false;
if (unlikely(f2fs_cp_error(sbi))) {
pagevec_release(&pvec);
@@ -1498,9 +1599,10 @@ continue_unlock:
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
- if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
+ ret = __write_node_page(page, false, &submitted, wbc);
+ if (ret)
unlock_page(page);
- else
+ else if (submitted)
nwritten++;
if (--wbc->nr_to_write == 0)
@@ -1564,72 +1666,6 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
return ret;
}
-static int f2fs_write_node_page(struct page *page,
- struct writeback_control *wbc)
-{
- struct f2fs_sb_info *sbi = F2FS_P_SB(page);
- nid_t nid;
- struct node_info ni;
- struct f2fs_io_info fio = {
- .sbi = sbi,
- .type = NODE,
- .op = REQ_OP_WRITE,
- .op_flags = wbc_to_write_flags(wbc),
- .page = page,
- .encrypted_page = NULL,
- };
-
- trace_f2fs_writepage(page, NODE);
-
- if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
- goto redirty_out;
- if (unlikely(f2fs_cp_error(sbi)))
- goto redirty_out;
-
- /* get old block addr of this node page */
- nid = nid_of_node(page);
- f2fs_bug_on(sbi, page->index != nid);
-
- if (wbc->for_reclaim) {
- if (!down_read_trylock(&sbi->node_write))
- goto redirty_out;
- } else {
- down_read(&sbi->node_write);
- }
-
- get_node_info(sbi, nid, &ni);
-
- /* This page is already truncated */
- if (unlikely(ni.blk_addr == NULL_ADDR)) {
- ClearPageUptodate(page);
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- up_read(&sbi->node_write);
- unlock_page(page);
- return 0;
- }
-
- set_page_writeback(page);
- fio.old_blkaddr = ni.blk_addr;
- write_node_page(nid, &fio);
- set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- up_read(&sbi->node_write);
-
- if (wbc->for_reclaim)
- f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
-
- unlock_page(page);
-
- if (unlikely(f2fs_cp_error(sbi)))
- f2fs_submit_merged_bio(sbi, NODE, WRITE);
-
- return 0;
-
-redirty_out:
- redirty_page_for_writepage(wbc, page);
- return AOP_WRITEPAGE_ACTIVATE;
-}
-
static int f2fs_write_node_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -2152,7 +2188,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
- ne = grab_nat_entry(nm_i, nid);
+ ne = grab_nat_entry(nm_i, nid, true);
node_info_from_raw_nat(&ne->ni, &raw_ne);
}
@@ -2350,6 +2386,14 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
GFP_KERNEL);
if (!nm_i->nat_bitmap)
return -ENOMEM;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
+ GFP_KERNEL);
+ if (!nm_i->nat_bitmap_mir)
+ return -ENOMEM;
+#endif
+
return 0;
}
@@ -2424,6 +2468,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
up_write(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+ kfree(nm_i->nat_bitmap_mir);
+#endif
sbi->nm_info = NULL;
kfree(nm_i);
}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e7997e240366..29ff783eb9c3 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -174,7 +174,7 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
spin_unlock(&nm_i->nid_list_lock);
return;
}
- fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next,
+ fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
struct free_nid, list);
*nid = fnid->nid;
spin_unlock(&nm_i->nid_list_lock);
@@ -186,6 +186,12 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir,
+ nm_i->bitmap_size))
+ f2fs_bug_on(sbi, 1);
+#endif
memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
}
@@ -203,6 +209,12 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
(seg_off << sbi->log_blocks_per_seg << 1) +
(block_off & (sbi->blocks_per_seg - 1)));
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (f2fs_test_bit(block_off, nm_i->nat_bitmap) !=
+ f2fs_test_bit(block_off, nm_i->nat_bitmap_mir))
+ f2fs_bug_on(sbi, 1);
+#endif
+
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
block_addr += sbi->blocks_per_seg;
@@ -228,6 +240,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
f2fs_change_bit(block_off, nm_i->nat_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_change_bit(block_off, nm_i->nat_bitmap_mir);
+#endif
}
static inline nid_t ino_of_node(struct page *node_page)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 981a9584b62f..e93316ea8d1b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -428,8 +428,9 @@ retry_dn:
}
if (!file_keep_isize(inode) &&
- (i_size_read(inode) <= (start << PAGE_SHIFT)))
- f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
+ (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
+ f2fs_i_size_write(inode,
+ (loff_t)(start + 1) << PAGE_SHIFT);
/*
* dest is reserved block, invalidate src block
@@ -552,10 +553,8 @@ next:
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
struct list_head dir_list;
- block_t blkaddr;
int err;
int ret = 0;
bool need_writecp = false;
@@ -571,8 +570,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
/* prevent checkpoint */
mutex_lock(&sbi->cp_mutex);
- blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list);
if (err || list_empty(&inode_list))
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0738f48293cc..df2ff5cfe8f4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -26,7 +26,7 @@
#define __reverse_ffz(x) __reverse_ffs(~(x))
static struct kmem_cache *discard_entry_slab;
-static struct kmem_cache *bio_entry_slab;
+static struct kmem_cache *discard_cmd_slab;
static struct kmem_cache *sit_entry_set_slab;
static struct kmem_cache *inmem_entry_slab;
@@ -242,11 +242,12 @@ void drop_inmem_pages(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
- clear_inode_flag(inode, FI_ATOMIC_FILE);
-
mutex_lock(&fi->inmem_lock);
__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
mutex_unlock(&fi->inmem_lock);
+
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ stat_dec_atomic_write(inode);
}
static int __commit_inmem_pages(struct inode *inode,
@@ -262,7 +263,7 @@ static int __commit_inmem_pages(struct inode *inode,
.op_flags = REQ_SYNC | REQ_PRIO,
.encrypted_page = NULL,
};
- bool submit_bio = false;
+ pgoff_t last_idx = ULONG_MAX;
int err = 0;
list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
@@ -288,15 +289,15 @@ static int __commit_inmem_pages(struct inode *inode,
/* record old blkaddr for revoking */
cur->old_addr = fio.old_blkaddr;
-
- submit_bio = true;
+ last_idx = page->index;
}
unlock_page(page);
list_move_tail(&cur->list, revoke_list);
}
- if (submit_bio)
- f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
+ if (last_idx != ULONG_MAX)
+ f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx,
+ DATA, WRITE);
if (!err)
__revoke_inmem_pages(inode, revoke_list, false, false);
@@ -315,6 +316,8 @@ int commit_inmem_pages(struct inode *inode)
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
+ set_inode_flag(inode, FI_ATOMIC_COMMIT);
+
mutex_lock(&fi->inmem_lock);
err = __commit_inmem_pages(inode, &revoke_list);
if (err) {
@@ -336,6 +339,8 @@ int commit_inmem_pages(struct inode *inode)
}
mutex_unlock(&fi->inmem_lock);
+ clear_inode_flag(inode, FI_ATOMIC_COMMIT);
+
f2fs_unlock_op(sbi);
return err;
}
@@ -421,6 +426,9 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
int ret = __submit_flush_wait(sbi->sb->s_bdev);
int i;
+ trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
+ test_opt(sbi, FLUSH_MERGE));
+
if (sbi->s_ndevs && !ret) {
for (i = 1; i < sbi->s_ndevs; i++) {
ret = __submit_flush_wait(FDEV(i).bdev);
@@ -434,7 +442,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
static int issue_flush_thread(void *data)
{
struct f2fs_sb_info *sbi = data;
- struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
wait_queue_head_t *q = &fcc->flush_wait_queue;
repeat:
if (kthread_should_stop())
@@ -463,16 +471,16 @@ repeat:
int f2fs_issue_flush(struct f2fs_sb_info *sbi)
{
- struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
struct flush_cmd cmd;
- trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
- test_opt(sbi, FLUSH_MERGE));
-
if (test_opt(sbi, NOBARRIER))
return 0;
- if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return submit_flush_wait(sbi);
+
+ if (!atomic_read(&fcc->submit_flush)) {
int ret;
atomic_inc(&fcc->submit_flush);
@@ -506,8 +514,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
struct flush_cmd_control *fcc;
int err = 0;
- if (SM_I(sbi)->cmd_control_info) {
- fcc = SM_I(sbi)->cmd_control_info;
+ if (SM_I(sbi)->fcc_info) {
+ fcc = SM_I(sbi)->fcc_info;
goto init_thread;
}
@@ -517,14 +525,14 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
atomic_set(&fcc->submit_flush, 0);
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
- SM_I(sbi)->cmd_control_info = fcc;
+ SM_I(sbi)->fcc_info = fcc;
init_thread:
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
kfree(fcc);
- SM_I(sbi)->cmd_control_info = NULL;
+ SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -533,7 +541,7 @@ init_thread:
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
{
- struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
if (fcc && fcc->f2fs_issue_flush) {
struct task_struct *flush_thread = fcc->f2fs_issue_flush;
@@ -543,7 +551,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
}
if (free) {
kfree(fcc);
- SM_I(sbi)->cmd_control_info = NULL;
+ SM_I(sbi)->fcc_info = NULL;
}
}
@@ -623,57 +631,131 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_unlock(&dirty_i->seglist_lock);
}
-static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
- struct bio *bio)
+static void __add_discard_cmd(struct f2fs_sb_info *sbi,
+ struct bio *bio, block_t lstart, block_t len)
{
- struct list_head *wait_list = &(SM_I(sbi)->wait_list);
- struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct list_head *cmd_list = &(dcc->discard_cmd_list);
+ struct discard_cmd *dc;
- INIT_LIST_HEAD(&be->list);
- be->bio = bio;
- init_completion(&be->event);
- list_add_tail(&be->list, wait_list);
+ dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
+ INIT_LIST_HEAD(&dc->list);
+ dc->bio = bio;
+ bio->bi_private = dc;
+ dc->lstart = lstart;
+ dc->len = len;
+ dc->state = D_PREP;
+ init_completion(&dc->wait);
- return be;
+ mutex_lock(&dcc->cmd_lock);
+ list_add_tail(&dc->list, cmd_list);
+ mutex_unlock(&dcc->cmd_lock);
}
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
+static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)
{
- struct list_head *wait_list = &(SM_I(sbi)->wait_list);
- struct bio_entry *be, *tmp;
+ int err = dc->bio->bi_error;
- list_for_each_entry_safe(be, tmp, wait_list, list) {
- struct bio *bio = be->bio;
- int err;
+ if (dc->state == D_DONE)
+ atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
- wait_for_completion_io(&be->event);
- err = be->error;
- if (err == -EOPNOTSUPP)
- err = 0;
+ if (err == -EOPNOTSUPP)
+ err = 0;
- if (err)
- f2fs_msg(sbi->sb, KERN_INFO,
+ if (err)
+ f2fs_msg(sbi->sb, KERN_INFO,
"Issue discard failed, ret: %d", err);
+ bio_put(dc->bio);
+ list_del(&dc->list);
+ kmem_cache_free(discard_cmd_slab, dc);
+}
+
+/* This should be covered by global mutex, &sit_i->sentry_lock */
+void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct list_head *wait_list = &(dcc->discard_cmd_list);
+ struct discard_cmd *dc, *tmp;
+
+ mutex_lock(&dcc->cmd_lock);
+ list_for_each_entry_safe(dc, tmp, wait_list, list) {
+
+ if (blkaddr == NULL_ADDR) {
+ if (dc->state == D_PREP) {
+ dc->state = D_SUBMIT;
+ submit_bio(dc->bio);
+ atomic_inc(&dcc->submit_discard);
+ }
+ wait_for_completion_io(&dc->wait);
+
+ __remove_discard_cmd(sbi, dc);
+ continue;
+ }
- bio_put(bio);
- list_del(&be->list);
- kmem_cache_free(bio_entry_slab, be);
+ if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) {
+ if (dc->state == D_SUBMIT)
+ wait_for_completion_io(&dc->wait);
+ else
+ __remove_discard_cmd(sbi, dc);
+ }
}
+ mutex_unlock(&dcc->cmd_lock);
}
-static void f2fs_submit_bio_wait_endio(struct bio *bio)
+static void f2fs_submit_discard_endio(struct bio *bio)
{
- struct bio_entry *be = (struct bio_entry *)bio->bi_private;
+ struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
- be->error = bio->bi_error;
- complete(&be->event);
+ complete(&dc->wait);
+ dc->state = D_DONE;
}
+static int issue_discard_thread(void *data)
+{
+ struct f2fs_sb_info *sbi = data;
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ wait_queue_head_t *q = &dcc->discard_wait_queue;
+ struct list_head *cmd_list = &dcc->discard_cmd_list;
+ struct discard_cmd *dc, *tmp;
+ struct blk_plug plug;
+ int iter = 0;
+repeat:
+ if (kthread_should_stop())
+ return 0;
+
+ blk_start_plug(&plug);
+
+ mutex_lock(&dcc->cmd_lock);
+ list_for_each_entry_safe(dc, tmp, cmd_list, list) {
+ if (dc->state == D_PREP) {
+ dc->state = D_SUBMIT;
+ submit_bio(dc->bio);
+ atomic_inc(&dcc->submit_discard);
+ if (iter++ > DISCARD_ISSUE_RATE)
+ break;
+ } else if (dc->state == D_DONE) {
+ __remove_discard_cmd(sbi, dc);
+ }
+ }
+ mutex_unlock(&dcc->cmd_lock);
+
+ blk_finish_plug(&plug);
+
+ iter = 0;
+ congestion_wait(BLK_RW_SYNC, HZ/50);
+
+ wait_event_interruptible(*q,
+ kthread_should_stop() || !list_empty(&dcc->discard_cmd_list));
+ goto repeat;
+}
+
+
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
struct bio *bio = NULL;
+ block_t lblkstart = blkstart;
int err;
trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
@@ -688,14 +770,12 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
SECTOR_FROM_BLOCK(blklen),
GFP_NOFS, 0, &bio);
if (!err && bio) {
- struct bio_entry *be = __add_bio_entry(sbi, bio);
-
- bio->bi_private = be;
- bio->bi_end_io = f2fs_submit_bio_wait_endio;
+ bio->bi_end_io = f2fs_submit_discard_endio;
bio->bi_opf |= REQ_SYNC;
- submit_bio(bio);
- }
+ __add_discard_cmd(sbi, bio, lblkstart, blklen);
+ wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
+ }
return err;
}
@@ -713,8 +793,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
}
sector = SECTOR_FROM_BLOCK(blkstart);
- if (sector & (bdev_zone_size(bdev) - 1) ||
- nr_sects != bdev_zone_size(bdev)) {
+ if (sector & (bdev_zone_sectors(bdev) - 1) ||
+ nr_sects != bdev_zone_sectors(bdev)) {
f2fs_msg(sbi->sb, KERN_INFO,
"(%d) %s: Unaligned discard attempted (block %x + %x)",
devi, sbi->s_ndevs ? FDEV(devi).path: "",
@@ -800,13 +880,14 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
struct cp_control *cpc, struct seg_entry *se,
unsigned int start, unsigned int end)
{
- struct list_head *head = &SM_I(sbi)->discard_list;
+ struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list;
struct discard_entry *new, *last;
if (!list_empty(head)) {
last = list_last_entry(head, struct discard_entry, list);
if (START_BLOCK(sbi, cpc->trim_start) + start ==
- last->blkaddr + last->len) {
+ last->blkaddr + last->len &&
+ last->len <= MAX_DISCARD_BLOCKS(sbi)) {
last->len += end - start;
goto done;
}
@@ -818,10 +899,11 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
new->len = end - start;
list_add_tail(&new->list, head);
done:
- SM_I(sbi)->nr_discards += end - start;
+ SM_I(sbi)->dcc_info->nr_discards += end - start;
}
-static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
+ bool check_only)
{
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
int max_blocks = sbi->blocks_per_seg;
@@ -835,12 +917,13 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int i;
if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
- return;
+ return false;
if (!force) {
if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
- SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
- return;
+ SM_I(sbi)->dcc_info->nr_discards >=
+ SM_I(sbi)->dcc_info->max_discards)
+ return false;
}
/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
@@ -848,7 +931,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
- while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
+ while (force || SM_I(sbi)->dcc_info->nr_discards <=
+ SM_I(sbi)->dcc_info->max_discards) {
start = __find_rev_next_bit(dmap, max_blocks, end + 1);
if (start >= max_blocks)
break;
@@ -858,13 +942,17 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
&& (end - start) < cpc->trim_minlen)
continue;
+ if (check_only)
+ return true;
+
__add_discard_entry(sbi, cpc, se, start, end);
}
+ return false;
}
void release_discard_addrs(struct f2fs_sb_info *sbi)
{
- struct list_head *head = &(SM_I(sbi)->discard_list);
+ struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
struct discard_entry *entry, *this;
/* drop caches */
@@ -890,17 +978,14 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
- struct list_head *head = &(SM_I(sbi)->discard_list);
+ struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
struct discard_entry *entry, *this;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- struct blk_plug plug;
unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason == CP_DISCARD);
- blk_start_plug(&plug);
-
mutex_lock(&dirty_i->seglist_lock);
while (1) {
@@ -916,9 +1001,13 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
dirty_i->nr_dirty[PRE] -= end - start;
- if (force || !test_opt(sbi, DISCARD))
+ if (!test_opt(sbi, DISCARD))
continue;
+ if (force && start >= cpc->trim_start &&
+ (end - 1) <= cpc->trim_end)
+ continue;
+
if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
@@ -946,11 +1035,62 @@ next:
cpc->trimmed += entry->len;
skip:
list_del(&entry->list);
- SM_I(sbi)->nr_discards -= entry->len;
+ SM_I(sbi)->dcc_info->nr_discards -= entry->len;
kmem_cache_free(discard_entry_slab, entry);
}
+}
- blk_finish_plug(&plug);
+static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ struct discard_cmd_control *dcc;
+ int err = 0;
+
+ if (SM_I(sbi)->dcc_info) {
+ dcc = SM_I(sbi)->dcc_info;
+ goto init_thread;
+ }
+
+ dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL);
+ if (!dcc)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&dcc->discard_entry_list);
+ INIT_LIST_HEAD(&dcc->discard_cmd_list);
+ mutex_init(&dcc->cmd_lock);
+ atomic_set(&dcc->submit_discard, 0);
+ dcc->nr_discards = 0;
+ dcc->max_discards = 0;
+
+ init_waitqueue_head(&dcc->discard_wait_queue);
+ SM_I(sbi)->dcc_info = dcc;
+init_thread:
+ dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
+ "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(dcc->f2fs_issue_discard)) {
+ err = PTR_ERR(dcc->f2fs_issue_discard);
+ kfree(dcc);
+ SM_I(sbi)->dcc_info = NULL;
+ return err;
+ }
+
+ return err;
+}
+
+static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+ if (dcc && dcc->f2fs_issue_discard) {
+ struct task_struct *discard_thread = dcc->f2fs_issue_discard;
+
+ dcc->f2fs_issue_discard = NULL;
+ kthread_stop(discard_thread);
+ }
+ if (free) {
+ kfree(dcc);
+ SM_I(sbi)->dcc_info = NULL;
+ }
}
static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
@@ -995,14 +1135,32 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
/* Update valid block bitmap */
if (del > 0) {
- if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
+ if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (f2fs_test_and_set_bit(offset,
+ se->cur_valid_map_mir))
+ f2fs_bug_on(sbi, 1);
+ else
+ WARN_ON(1);
+#else
f2fs_bug_on(sbi, 1);
+#endif
+ }
if (f2fs_discard_en(sbi) &&
!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
} else {
- if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
+ if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (!f2fs_test_and_clear_bit(offset,
+ se->cur_valid_map_mir))
+ f2fs_bug_on(sbi, 1);
+ else
+ WARN_ON(1);
+#else
f2fs_bug_on(sbi, 1);
+#endif
+ }
if (f2fs_discard_en(sbi) &&
f2fs_test_and_clear_bit(offset, se->discard_map))
sbi->discard_blks++;
@@ -1424,9 +1582,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
unsigned int old_segno;
int i;
- if (test_opt(sbi, LFS))
- return;
-
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
curseg = CURSEG_I(sbi, i);
old_segno = curseg->segno;
@@ -1439,6 +1594,19 @@ static const struct segment_allocation default_salloc_ops = {
.allocate_segment = allocate_segment_by_default,
};
+bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+ __u64 trim_start = cpc->trim_start;
+
+ mutex_lock(&SIT_I(sbi)->sentry_lock);
+ for (; trim_start <= cpc->trim_end; trim_start++)
+ if (add_discard_addrs(sbi, cpc, true))
+ break;
+ mutex_unlock(&SIT_I(sbi)->sentry_lock);
+
+ return trim_start <= cpc->trim_end;
+}
+
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
__u64 start = F2FS_BYTES_TO_BLK(range->start);
@@ -1459,36 +1627,25 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
"Found FS corruption, run fsck to fix.");
goto out;
}
+ if (sbi->discard_blks == 0)
+ goto out;
/* start/end segment number in main_area */
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
+ /*
+ * do checkpoint to issue discard commands safely since we now can
+ * use async discard.
+ */
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
+ cpc.trim_start = start_segno;
+ cpc.trim_end = end_segno;
- /* do checkpoint to issue discard commands safely */
- for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
- cpc.trim_start = start_segno;
-
- if (sbi->discard_blks == 0)
- break;
- else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
- cpc.trim_end = end_segno;
- else
- cpc.trim_end = min_t(unsigned int,
- rounddown(start_segno +
- BATCHED_TRIM_SEGMENTS(sbi),
- sbi->segs_per_sec) - 1, end_segno);
-
- mutex_lock(&sbi->gc_mutex);
- err = write_checkpoint(sbi, &cpc);
- mutex_unlock(&sbi->gc_mutex);
- if (err)
- break;
-
- schedule();
- }
+ mutex_lock(&sbi->gc_mutex);
+ err = write_checkpoint(sbi, &cpc);
+ mutex_unlock(&sbi->gc_mutex);
out:
range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
return err;
@@ -1573,6 +1730,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ f2fs_wait_discard_bio(sbi, *new_blkaddr);
+
/*
* __add_sum_entry should be resided under the curseg_mutex
* because, this function updates a summary entry in the
@@ -1603,15 +1762,20 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
int type = __get_segment_type(fio->page, fio->type);
+ int err;
if (fio->type == NODE || fio->type == DATA)
mutex_lock(&fio->sbi->wio_mutex[fio->type]);
-
+reallocate:
allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type);
/* writeout dirty page into bdev */
- f2fs_submit_page_mbio(fio);
+ err = f2fs_submit_page_mbio(fio);
+ if (err == -EAGAIN) {
+ fio->old_blkaddr = fio->new_blkaddr;
+ goto reallocate;
+ }
if (fio->type == NODE || fio->type == DATA)
mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
@@ -1753,7 +1917,8 @@ void f2fs_wait_on_page_writeback(struct page *page,
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
- f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE);
+ f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
+ 0, page->index, type, WRITE);
if (ordered)
wait_on_page_writeback(page);
else
@@ -2228,7 +2393,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* add discard candidates */
if (cpc->reason != CP_DISCARD) {
cpc->trim_start = segno;
- add_discard_addrs(sbi, cpc);
+ add_discard_addrs(sbi, cpc, false);
}
if (to_journal) {
@@ -2263,8 +2428,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_bug_on(sbi, sit_i->dirty_sentries);
out:
if (cpc->reason == CP_DISCARD) {
+ __u64 trim_start = cpc->trim_start;
+
for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
- add_discard_addrs(sbi, cpc);
+ add_discard_addrs(sbi, cpc, false);
+
+ cpc->trim_start = trim_start;
}
mutex_unlock(&sit_i->sentry_lock);
@@ -2276,7 +2445,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
- char *src_bitmap, *dst_bitmap;
+ char *src_bitmap;
unsigned int bitmap_size;
/* allocate memory for SIT information */
@@ -2305,6 +2474,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
!sit_i->sentries[start].ckpt_valid_map)
return -ENOMEM;
+#ifdef CONFIG_F2FS_CHECK_FS
+ sit_i->sentries[start].cur_valid_map_mir
+ = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+ if (!sit_i->sentries[start].cur_valid_map_mir)
+ return -ENOMEM;
+#endif
+
if (f2fs_discard_en(sbi)) {
sit_i->sentries[start].discard_map
= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -2331,9 +2507,15 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
- dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
- if (!dst_bitmap)
+ sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+ if (!sit_i->sit_bitmap)
+ return -ENOMEM;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+ if (!sit_i->sit_bitmap_mir)
return -ENOMEM;
+#endif
/* init SIT information */
sit_i->s_ops = &default_salloc_ops;
@@ -2341,7 +2523,6 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
sit_i->written_valid_blocks = 0;
- sit_i->sit_bitmap = dst_bitmap;
sit_i->bitmap_size = bitmap_size;
sit_i->dirty_sentries = 0;
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
@@ -2626,13 +2807,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
- INIT_LIST_HEAD(&sm_info->discard_list);
- INIT_LIST_HEAD(&sm_info->wait_list);
- sm_info->nr_discards = 0;
- sm_info->max_discards = 0;
-
- sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
-
INIT_LIST_HEAD(&sm_info->sit_entry_set);
if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
@@ -2641,6 +2815,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
return err;
}
+ err = create_discard_cmd_control(sbi);
+ if (err)
+ return err;
+
err = build_sit_info(sbi);
if (err)
return err;
@@ -2734,6 +2912,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
if (sit_i->sentries) {
for (start = 0; start < MAIN_SEGS(sbi); start++) {
kfree(sit_i->sentries[start].cur_valid_map);
+#ifdef CONFIG_F2FS_CHECK_FS
+ kfree(sit_i->sentries[start].cur_valid_map_mir);
+#endif
kfree(sit_i->sentries[start].ckpt_valid_map);
kfree(sit_i->sentries[start].discard_map);
}
@@ -2746,6 +2927,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
SM_I(sbi)->sit_info = NULL;
kfree(sit_i->sit_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+ kfree(sit_i->sit_bitmap_mir);
+#endif
kfree(sit_i);
}
@@ -2756,6 +2940,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
if (!sm_info)
return;
destroy_flush_cmd_control(sbi, true);
+ destroy_discard_cmd_control(sbi, true);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
@@ -2771,15 +2956,15 @@ int __init create_segment_manager_caches(void)
if (!discard_entry_slab)
goto fail;
- bio_entry_slab = f2fs_kmem_cache_create("bio_entry",
- sizeof(struct bio_entry));
- if (!bio_entry_slab)
+ discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
+ sizeof(struct discard_cmd));
+ if (!discard_cmd_slab)
goto destroy_discard_entry;
sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
sizeof(struct sit_entry_set));
if (!sit_entry_set_slab)
- goto destroy_bio_entry;
+ goto destroy_discard_cmd;
inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
sizeof(struct inmem_pages));
@@ -2789,8 +2974,8 @@ int __init create_segment_manager_caches(void)
destroy_sit_entry_set:
kmem_cache_destroy(sit_entry_set_slab);
-destroy_bio_entry:
- kmem_cache_destroy(bio_entry_slab);
+destroy_discard_cmd:
+ kmem_cache_destroy(discard_cmd_slab);
destroy_discard_entry:
kmem_cache_destroy(discard_entry_slab);
fail:
@@ -2800,7 +2985,7 @@ fail:
void destroy_segment_manager_caches(void)
{
kmem_cache_destroy(sit_entry_set_slab);
- kmem_cache_destroy(bio_entry_slab);
+ kmem_cache_destroy(discard_cmd_slab);
kmem_cache_destroy(discard_entry_slab);
kmem_cache_destroy(inmem_entry_slab);
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 9d44ce83acb2..5cb5755c75d9 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -164,6 +164,9 @@ struct seg_entry {
unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */
unsigned int padding:6; /* padding */
unsigned char *cur_valid_map; /* validity bitmap of blocks */
+#ifdef CONFIG_F2FS_CHECK_FS
+ unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
+#endif
/*
* # of valid blocks and the validity bitmap stored in the the last
* checkpoint pack. This information is used by the SSR mode.
@@ -186,9 +189,12 @@ struct segment_allocation {
* the page is atomically written, and it is in inmem_pages list.
*/
#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
+#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
#define IS_ATOMIC_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
+#define IS_DUMMY_WRITTEN_PAGE(page) \
+ (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
struct inmem_pages {
struct list_head list;
@@ -203,6 +209,9 @@ struct sit_info {
block_t sit_blocks; /* # of blocks used by SIT area */
block_t written_valid_blocks; /* # of valid blocks in main area */
char *sit_bitmap; /* SIT bitmap pointer */
+#ifdef CONFIG_F2FS_CHECK_FS
+ char *sit_bitmap_mir; /* SIT bitmap mirror */
+#endif
unsigned int bitmap_size; /* SIT bitmap size */
unsigned long *tmp_map; /* bitmap for temporal use */
@@ -317,6 +326,9 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se,
se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+#ifdef CONFIG_F2FS_CHECK_FS
+ memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+#endif
se->type = GET_SIT_TYPE(rs);
se->mtime = le64_to_cpu(rs->mtime);
}
@@ -414,6 +426,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
void *dst_addr)
{
struct sit_info *sit_i = SIT_I(sbi);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir,
+ sit_i->bitmap_size))
+ f2fs_bug_on(sbi, 1);
+#endif
memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
}
@@ -634,6 +652,12 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
check_seg_range(sbi, start);
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (f2fs_test_bit(offset, sit_i->sit_bitmap) !=
+ f2fs_test_bit(offset, sit_i->sit_bitmap_mir))
+ f2fs_bug_on(sbi, 1);
+#endif
+
/* calculate sit block address */
if (f2fs_test_bit(offset, sit_i->sit_bitmap))
blk_addr += sit_i->sit_blocks;
@@ -659,6 +683,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
unsigned int block_off = SIT_BLOCK_OFFSET(start);
f2fs_change_bit(block_off, sit_i->sit_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_change_bit(block_off, sit_i->sit_bitmap_mir);
+#endif
}
static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 702638e21c76..7875a4823a0d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -101,6 +101,7 @@ enum {
Opt_noinline_data,
Opt_data_flush,
Opt_mode,
+ Opt_io_size_bits,
Opt_fault_injection,
Opt_lazytime,
Opt_nolazytime,
@@ -133,6 +134,7 @@ static match_table_t f2fs_tokens = {
{Opt_noinline_data, "noinline_data"},
{Opt_data_flush, "data_flush"},
{Opt_mode, "mode=%s"},
+ {Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
@@ -143,6 +145,7 @@ static match_table_t f2fs_tokens = {
enum {
GC_THREAD, /* struct f2fs_gc_thread */
SM_INFO, /* struct f2fs_sm_info */
+ DCC_INFO, /* struct discard_cmd_control */
NM_INFO, /* struct f2fs_nm_info */
F2FS_SBI, /* struct f2fs_sb_info */
#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -166,6 +169,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
return (unsigned char *)sbi->gc_thread;
else if (struct_type == SM_INFO)
return (unsigned char *)SM_I(sbi);
+ else if (struct_type == DCC_INFO)
+ return (unsigned char *)SM_I(sbi)->dcc_info;
else if (struct_type == NM_INFO)
return (unsigned char *)NM_I(sbi);
else if (struct_type == F2FS_SBI)
@@ -281,8 +286,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
@@ -307,7 +311,6 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_idle),
ATTR_LIST(reclaim_segments),
ATTR_LIST(max_small_discards),
- ATTR_LIST(batched_trim_sections),
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
@@ -535,11 +538,23 @@ static int parse_options(struct super_block *sb, char *options)
}
kfree(name);
break;
+ case Opt_io_size_bits:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
+ f2fs_msg(sb, KERN_WARNING,
+ "Not support %d, larger than %d",
+ 1 << arg, BIO_MAX_PAGES);
+ return -EINVAL;
+ }
+ sbi->write_io_size_bits = arg;
+ break;
case Opt_fault_injection:
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
f2fs_build_fault_attr(sbi, arg);
+ set_opt(sbi, FAULT_INJECTION);
#else
f2fs_msg(sb, KERN_INFO,
"FAULT_INJECTION was not selected");
@@ -558,6 +573,13 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
}
}
+
+ if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Should set mode=lfs with %uKB-sized IO",
+ F2FS_IO_SIZE_KB(sbi));
+ return -EINVAL;
+ }
return 0;
}
@@ -750,6 +772,9 @@ static void f2fs_put_super(struct super_block *sb)
write_checkpoint(sbi, &cpc);
}
+ /* be sure to wait for any on-going discard commands */
+ f2fs_wait_discard_bio(sbi, NULL_ADDR);
+
/* write_checkpoint can update stat informaion */
f2fs_destroy_stats(sbi);
@@ -918,6 +943,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
else if (test_opt(sbi, LFS))
seq_puts(seq, "lfs");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+ if (F2FS_IO_SIZE_BITS(sbi))
+ seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (test_opt(sbi, FAULT_INJECTION))
+ seq_puts(seq, ",fault_injection");
+#endif
return 0;
}
@@ -1156,12 +1187,6 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
ctx, len, NULL);
}
-static int f2fs_key_prefix(struct inode *inode, u8 **key)
-{
- *key = F2FS_I_SB(inode)->key_prefix;
- return F2FS_I_SB(inode)->key_prefix_size;
-}
-
static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
void *fs_data)
{
@@ -1176,16 +1201,16 @@ static unsigned f2fs_max_namelen(struct inode *inode)
inode->i_sb->s_blocksize : F2FS_NAME_LEN;
}
-static struct fscrypt_operations f2fs_cryptops = {
+static const struct fscrypt_operations f2fs_cryptops = {
+ .key_prefix = "f2fs:",
.get_context = f2fs_get_context,
- .key_prefix = f2fs_key_prefix,
.set_context = f2fs_set_context,
.is_encrypted = f2fs_encrypted_inode,
.empty_dir = f2fs_empty_dir,
.max_namelen = f2fs_max_namelen,
};
#else
-static struct fscrypt_operations f2fs_cryptops = {
+static const struct fscrypt_operations f2fs_cryptops = {
.is_encrypted = f2fs_encrypted_inode,
};
#endif
@@ -1518,12 +1543,6 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
mutex_init(&sbi->wio_mutex[NODE]);
mutex_init(&sbi->wio_mutex[DATA]);
spin_lock_init(&sbi->cp_lock);
-
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX,
- F2FS_KEY_DESC_PREFIX_SIZE);
- sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE;
-#endif
}
static int init_percpu_info(struct f2fs_sb_info *sbi)
@@ -1553,16 +1572,16 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
return 0;
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
- SECTOR_TO_BLOCK(bdev_zone_size(bdev)))
+ SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
return -EINVAL;
- sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
+ sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
__ilog2_u32(sbi->blocks_per_blkz))
return -EINVAL;
sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
sbi->log_blocks_per_blkz;
- if (nr_sectors & (bdev_zone_size(bdev) - 1))
+ if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
@@ -1763,6 +1782,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
FDEV(i).total_segments,
FDEV(i).start_blk, FDEV(i).end_blk);
}
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
return 0;
}
@@ -1880,12 +1901,19 @@ try_onemore:
if (err)
goto free_options;
+ if (F2FS_IO_SIZE(sbi) > 1) {
+ sbi->write_io_dummy =
+ mempool_create_page_pool(F2FS_IO_SIZE(sbi) - 1, 0);
+ if (!sbi->write_io_dummy)
+ goto free_options;
+ }
+
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
- goto free_options;
+ goto free_io_dummy;
}
err = get_valid_checkpoint(sbi);
@@ -2060,6 +2088,8 @@ skip_recovery:
sbi->valid_super_block ? 1 : 2, err);
}
+ f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
+ cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
f2fs_update_time(sbi, REQ_TIME);
return 0;
@@ -2103,6 +2133,8 @@ free_devices:
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
+free_io_dummy:
+ mempool_destroy(sbi->write_io_dummy);
free_options:
destroy_percpu_info(sbi);
kfree(options);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index c47ce2f330a1..f7482635a57d 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -217,6 +217,112 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
return entry;
}
+static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
+ void **last_addr, int index,
+ size_t len, const char *name)
+{
+ struct f2fs_xattr_entry *entry;
+ unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
+
+ list_for_each_xattr(entry, base_addr) {
+ if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
+ (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
+ base_addr + inline_size) {
+ *last_addr = entry;
+ return NULL;
+ }
+ if (entry->e_name_index != index)
+ continue;
+ if (entry->e_name_len != len)
+ continue;
+ if (!memcmp(entry->e_name, name, len))
+ break;
+ }
+ return entry;
+}
+
+static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
+ unsigned int index, unsigned int len,
+ const char *name, struct f2fs_xattr_entry **xe,
+ void **base_addr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ void *cur_addr, *txattr_addr, *last_addr = NULL;
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+ unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
+ unsigned int inline_size = 0;
+ int err = 0;
+
+ inline_size = inline_xattr_size(inode);
+
+ if (!size && !inline_size)
+ return -ENODATA;
+
+ txattr_addr = kzalloc(inline_size + size + sizeof(__u32),
+ GFP_F2FS_ZERO);
+ if (!txattr_addr)
+ return -ENOMEM;
+
+ /* read from inline xattr */
+ if (inline_size) {
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(ipage);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+ inline_addr = inline_xattr_addr(page);
+ }
+ memcpy(txattr_addr, inline_addr, inline_size);
+ f2fs_put_page(page, 1);
+
+ *xe = __find_inline_xattr(txattr_addr, &last_addr,
+ index, len, name);
+ if (*xe)
+ goto check;
+ }
+
+ /* read from xattr node block */
+ if (xnid) {
+ struct page *xpage;
+ void *xattr_addr;
+
+ /* The inode already has an extended attribute block. */
+ xpage = get_node_page(sbi, xnid);
+ if (IS_ERR(xpage)) {
+ err = PTR_ERR(xpage);
+ goto out;
+ }
+
+ xattr_addr = page_address(xpage);
+ memcpy(txattr_addr + inline_size, xattr_addr, size);
+ f2fs_put_page(xpage, 1);
+ }
+
+ if (last_addr)
+ cur_addr = XATTR_HDR(last_addr) - 1;
+ else
+ cur_addr = txattr_addr;
+
+ *xe = __find_xattr(cur_addr, index, len, name);
+check:
+ if (IS_XATTR_LAST_ENTRY(*xe)) {
+ err = -ENODATA;
+ goto out;
+ }
+
+ *base_addr = txattr_addr;
+ return 0;
+out:
+ kzfree(txattr_addr);
+ return err;
+}
+
static int read_all_xattrs(struct inode *inode, struct page *ipage,
void **base_addr)
{
@@ -348,8 +454,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
xattr_addr = page_address(xpage);
- memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
- sizeof(struct node_footer));
+ memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE);
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
@@ -361,10 +466,11 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
int f2fs_getxattr(struct inode *inode, int index, const char *name,
void *buffer, size_t buffer_size, struct page *ipage)
{
- struct f2fs_xattr_entry *entry;
- void *base_addr;
+ struct f2fs_xattr_entry *entry = NULL;
int error = 0;
- size_t size, len;
+ unsigned int size, len;
+ char *pval;
+ void *base_addr = NULL;
if (name == NULL)
return -EINVAL;
@@ -373,30 +479,26 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
- error = read_all_xattrs(inode, ipage, &base_addr);
+ error = lookup_all_xattrs(inode, ipage, index, len, name,
+ &entry, &base_addr);
if (error)
return error;
- entry = __find_xattr(base_addr, index, len, name);
- if (IS_XATTR_LAST_ENTRY(entry)) {
- error = -ENODATA;
- goto cleanup;
- }
-
size = le16_to_cpu(entry->e_value_size);
if (buffer && size > buffer_size) {
error = -ERANGE;
- goto cleanup;
+ goto out;
}
+ pval = entry->e_name + entry->e_name_len;
+
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
memcpy(buffer, pval, size);
}
error = size;
-
-cleanup:
+out:
kzfree(base_addr);
return error;
}
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index f990de20cdcd..d5a94928c116 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -72,9 +72,10 @@ struct f2fs_xattr_entry {
for (entry = XATTR_FIRST_ENTRY(addr);\
!IS_XATTR_LAST_ENTRY(entry);\
entry = XATTR_NEXT_ENTRY(entry))
-
-#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \
- sizeof(struct node_footer) - sizeof(__u32))
+#define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
+#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - sizeof(__u32))
+#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
+ VALID_XATTR_BLOCK_SIZE)
#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
sizeof(struct f2fs_xattr_header) - \
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 4304072161aa..40d61077bead 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -542,6 +542,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) {
if (invalidate)
set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
+ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL);
}
} else {
@@ -560,6 +561,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t,
TASK_UNINTERRUPTIBLE);
+ /* Make sure any pending writes are cancelled. */
+ if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
+ fscache_invalidate_writes(cookie);
+
/* Reset the cookie state if it wasn't relinquished */
if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) {
atomic_inc(&cookie->n_active);
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index 9b28649df3a1..a8aa00be4444 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -48,6 +48,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
cookie->flags = 1 << FSCACHE_COOKIE_ENABLED;
spin_lock_init(&cookie->lock);
+ spin_lock_init(&cookie->stores_lock);
INIT_HLIST_HEAD(&cookie->backing_objects);
/* check the netfs type is not already present */
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 9e792e30f4db..7a182c87f378 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -30,6 +30,7 @@ static const struct fscache_state *fscache_look_up_object(struct fscache_object
static const struct fscache_state *fscache_object_available(struct fscache_object *, int);
static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int);
static const struct fscache_state *fscache_update_object(struct fscache_object *, int);
+static const struct fscache_state *fscache_object_dead(struct fscache_object *, int);
#define __STATE_NAME(n) fscache_osm_##n
#define STATE(n) (&__STATE_NAME(n))
@@ -91,7 +92,7 @@ static WORK_STATE(LOOKUP_FAILURE, "LCFL", fscache_lookup_failure);
static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object);
static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents);
static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object);
-static WORK_STATE(OBJECT_DEAD, "DEAD", (void*)2UL);
+static WORK_STATE(OBJECT_DEAD, "DEAD", fscache_object_dead);
static WAIT_STATE(WAIT_FOR_INIT, "?INI",
TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD));
@@ -229,6 +230,10 @@ execute_work_state:
event = -1;
if (new_state == NO_TRANSIT) {
_debug("{OBJ%x} %s notrans", object->debug_id, state->name);
+ if (unlikely(state == STATE(OBJECT_DEAD))) {
+ _leave(" [dead]");
+ return;
+ }
fscache_enqueue_object(object);
event_mask = object->oob_event_mask;
goto unmask_events;
@@ -239,7 +244,7 @@ execute_work_state:
object->state = state = new_state;
if (state->work) {
- if (unlikely(state->work == ((void *)2UL))) {
+ if (unlikely(state == STATE(OBJECT_DEAD))) {
_leave(" [dead]");
return;
}
@@ -645,6 +650,12 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob
fscache_mark_object_dead(object);
object->oob_event_mask = 0;
+ if (test_bit(FSCACHE_OBJECT_RETIRED, &object->flags)) {
+ /* Reject any new read/write ops and abort any that are pending. */
+ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
+ fscache_cancel_all_ops(object);
+ }
+
if (list_empty(&object->dependents) &&
object->n_ops == 0 &&
object->n_children == 0)
@@ -1077,3 +1088,20 @@ void fscache_object_mark_killed(struct fscache_object *object,
}
}
EXPORT_SYMBOL(fscache_object_mark_killed);
+
+/*
+ * The object is dead. We can get here if an object gets queued by an event
+ * that would lead to its death (such as EV_KILL) when the dispatcher is
+ * already running (and so can be requeued) but hasn't yet cleared the event
+ * mask.
+ */
+static const struct fscache_state *fscache_object_dead(struct fscache_object *object,
+ int event)
+{
+ if (!test_and_set_bit(FSCACHE_OBJECT_RUN_AFTER_DEAD,
+ &object->flags))
+ return NO_TRANSIT;
+
+ WARN(true, "FS-Cache object redispatched after death");
+ return NO_TRANSIT;
+}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 70ea57c7b6bb..f11792672977 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -399,6 +399,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
{
spin_lock(&fiq->waitq.lock);
+ if (test_bit(FR_FINISHED, &req->flags)) {
+ spin_unlock(&fiq->waitq.lock);
+ return;
+ }
if (list_empty(&req->intr_entry)) {
list_add_tail(&req->intr_entry, &fiq->interrupts);
wake_up_locked(&fiq->waitq);
@@ -1372,6 +1376,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
* code can Oops if the buffer persists after module unload.
*/
bufs[page_nr].ops = &nosteal_pipe_buf_ops;
+ bufs[page_nr].flags = 0;
ret = add_to_pipe(pipe, &bufs[page_nr++]);
if (unlikely(ret < 0))
break;
@@ -2025,7 +2030,6 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
struct fuse_req *req;
req = list_entry(head->next, struct fuse_req, list);
req->out.h.error = -ECONNABORTED;
- clear_bit(FR_PENDING, &req->flags);
clear_bit(FR_SENT, &req->flags);
list_del_init(&req->list);
request_end(fc, req);
@@ -2103,6 +2107,8 @@ void fuse_abort_conn(struct fuse_conn *fc)
spin_lock(&fiq->waitq.lock);
fiq->connected = 0;
list_splice_init(&fiq->pending, &to_end2);
+ list_for_each_entry(req, &to_end2, list)
+ clear_bit(FR_PENDING, &req->flags);
while (forget_pending(fiq))
kfree(dequeue_forget(fiq, 1, NULL));
wake_up_all_locked(&fiq->waitq);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 1f7c732f32b0..811fd8929a18 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -68,7 +68,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec)
if (sec || nsec) {
struct timespec64 ts = {
sec,
- max_t(u32, nsec, NSEC_PER_SEC - 1)
+ min_t(u32, nsec, NSEC_PER_SEC - 1)
};
return get_jiffies_64() + timespec64_to_jiffies(&ts);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 6b039d7ce160..ed7a2e252ad8 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -143,8 +143,8 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
/* This is the same as calling block_write_full_page, but it also
* writes pages outside of i_size
*/
-int gfs2_write_full_page(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc)
+static int gfs2_write_full_page(struct page *page, get_block_t *get_block,
+ struct writeback_control *wbc)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index fc5da4cbe88c..01b97c012c6e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -720,6 +720,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrp_list rlist;
+ struct gfs2_trans *tr;
u64 bn, bstart;
u32 blen, btotal;
__be64 *p;
@@ -728,6 +729,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
unsigned int revokes = 0;
int x;
int error;
+ int jblocks_rqsted;
error = gfs2_rindex_update(sdp);
if (error)
@@ -791,12 +793,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */
gfs2_rs_deltree(&ip->i_res);
- error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
- RES_INDIRECT + RES_STATFS + RES_QUOTA,
- revokes);
+restart:
+ jblocks_rqsted = rg_blocks + RES_DINODE +
+ RES_INDIRECT + RES_STATFS + RES_QUOTA +
+ gfs2_struct2blk(sdp, revokes, sizeof(u64));
+ if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2))
+ jblocks_rqsted = atomic_read(&sdp->sd_log_thresh2);
+ error = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
if (error)
goto out_rg_gunlock;
+ tr = current->journal_info;
down_write(&ip->i_rw_mutex);
gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -810,6 +817,16 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
if (!*p)
continue;
+ /* check for max reasonable journal transaction blocks */
+ if (tr->tr_num_buf_new + RES_STATFS +
+ RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
+ if (rg_blocks >= tr->tr_num_buf_new)
+ rg_blocks -= tr->tr_num_buf_new;
+ else
+ rg_blocks = 0;
+ break;
+ }
+
bn = be64_to_cpu(*p);
if (bstart + blen == bn)
@@ -827,6 +844,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
*p = 0;
gfs2_add_inode_blocks(&ip->i_inode, -1);
}
+ if (p == bottom)
+ rg_blocks = 0;
+
if (bstart) {
__gfs2_free_blocks(ip, bstart, blen, metadata);
btotal += blen;
@@ -844,6 +864,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
gfs2_trans_end(sdp);
+ if (rg_blocks)
+ goto restart;
+
out_rg_gunlock:
gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
out_rlist:
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 94f50cac91c6..a2d45db32cd5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1802,16 +1802,18 @@ void gfs2_glock_exit(void)
static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
{
- do {
- gi->gl = rhashtable_walk_next(&gi->hti);
+ while ((gi->gl = rhashtable_walk_next(&gi->hti))) {
if (IS_ERR(gi->gl)) {
if (PTR_ERR(gi->gl) == -EAGAIN)
continue;
gi->gl = NULL;
+ return;
}
- /* Skip entries for other sb and dead entries */
- } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) ||
- __lockref_is_dead(&gi->gl->gl_lockref)));
+ /* Skip entries for other sb and dead entries */
+ if (gi->sdp == gi->gl->gl_name.ln_sbd &&
+ !__lockref_is_dead(&gi->gl->gl_lockref))
+ return;
+ }
}
static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a6a3389a07fc..c45084ac642d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -470,15 +470,19 @@ struct gfs2_quota_data {
struct rcu_head qd_rcu;
};
+enum {
+ TR_TOUCHED = 1,
+ TR_ATTACHED = 2,
+ TR_ALLOCED = 3,
+};
+
struct gfs2_trans {
unsigned long tr_ip;
unsigned int tr_blocks;
unsigned int tr_revokes;
unsigned int tr_reserved;
- unsigned int tr_touched:1;
- unsigned int tr_attached:1;
- unsigned int tr_alloced:1;
+ unsigned long tr_flags;
unsigned int tr_num_buf_new;
unsigned int tr_num_databuf_new;
@@ -794,6 +798,7 @@ struct gfs2_sbd {
atomic_t sd_log_thresh1;
atomic_t sd_log_thresh2;
atomic_t sd_log_blks_free;
+ atomic_t sd_log_blks_needed;
wait_queue_head_t sd_log_waitq;
wait_queue_head_t sd_logd_waitq;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 27c00a16def0..f865b96374df 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -349,6 +349,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
if (gfs2_assert_warn(sdp, blks) ||
gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
return -EINVAL;
+ atomic_add(blks, &sdp->sd_log_blks_needed);
retry:
free_blocks = atomic_read(&sdp->sd_log_blks_free);
if (unlikely(free_blocks <= wanted)) {
@@ -370,6 +371,7 @@ retry:
wake_up(&sdp->sd_reserving_log_wait);
goto retry;
}
+ atomic_sub(blks, &sdp->sd_log_blks_needed);
trace_gfs2_log_blocks(sdp, -blks);
/*
@@ -797,7 +799,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
{
- WARN_ON_ONCE(old->tr_attached != 1);
+ WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
old->tr_num_buf_new += new->tr_num_buf_new;
old->tr_num_databuf_new += new->tr_num_databuf_new;
@@ -821,9 +823,9 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
if (sdp->sd_log_tr) {
gfs2_merge_trans(sdp->sd_log_tr, tr);
} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
- gfs2_assert_withdraw(sdp, tr->tr_alloced);
+ gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
sdp->sd_log_tr = tr;
- tr->tr_attached = 1;
+ set_bit(TR_ATTACHED, &tr->tr_flags);
}
sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
@@ -891,13 +893,16 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
{
- return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
+ return (atomic_read(&sdp->sd_log_pinned) +
+ atomic_read(&sdp->sd_log_blks_needed) >=
+ atomic_read(&sdp->sd_log_thresh1));
}
static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
{
unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
- return used_blocks >= atomic_read(&sdp->sd_log_thresh2);
+ return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
+ atomic_read(&sdp->sd_log_thresh2);
}
/**
@@ -913,12 +918,15 @@ int gfs2_logd(void *data)
struct gfs2_sbd *sdp = data;
unsigned long t = 1;
DEFINE_WAIT(wait);
+ bool did_flush;
while (!kthread_should_stop()) {
+ did_flush = false;
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
gfs2_ail1_empty(sdp);
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
+ did_flush = true;
}
if (gfs2_ail_flush_reqd(sdp)) {
@@ -926,9 +934,10 @@ int gfs2_logd(void *data)
gfs2_ail1_wait(sdp);
gfs2_ail1_empty(sdp);
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
+ did_flush = true;
}
- if (!gfs2_ail_flush_reqd(sdp))
+ if (!gfs2_ail_flush_reqd(sdp) || did_flush)
wake_up(&sdp->sd_log_waitq);
t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 49db8ef13fdf..663ffc135ef3 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -292,7 +292,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh))) {
struct gfs2_trans *tr = current->journal_info;
- if (tr && tr->tr_touched)
+ if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
gfs2_io_error_bh(sdp, bh);
brelse(bh);
*bhp = NULL;
@@ -319,7 +319,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
if (!buffer_uptodate(bh)) {
struct gfs2_trans *tr = current->journal_info;
- if (tr && tr->tr_touched)
+ if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
gfs2_io_error_bh(sdp, bh);
return -EIO;
}
@@ -345,7 +345,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
tr->tr_num_buf_rm++;
else
tr->tr_num_databuf_rm++;
- tr->tr_touched = 1;
+ set_bit(TR_TOUCHED, &tr->tr_flags);
was_pinned = 1;
brelse(bh);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a34308df927f..58704799f0b9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -683,6 +683,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
goto fail_jindex;
}
+ atomic_set(&sdp->sd_log_blks_needed, 0);
if (sdp->sd_args.ar_spectator) {
sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 0c1bde395062..affef3c066e0 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -48,7 +48,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
tr->tr_blocks = blocks;
tr->tr_revokes = revokes;
tr->tr_reserved = 1;
- tr->tr_alloced = 1;
+ set_bit(TR_ALLOCED, &tr->tr_flags);
if (blocks)
tr->tr_reserved += 6 + blocks;
if (revokes)
@@ -78,7 +78,8 @@ static void gfs2_print_trans(const struct gfs2_trans *tr)
{
pr_warn("Transaction created at: %pSR\n", (void *)tr->tr_ip);
pr_warn("blocks=%u revokes=%u reserved=%u touched=%u\n",
- tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched);
+ tr->tr_blocks, tr->tr_revokes, tr->tr_reserved,
+ test_bit(TR_TOUCHED, &tr->tr_flags));
pr_warn("Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
tr->tr_num_buf_new, tr->tr_num_buf_rm,
tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
@@ -89,12 +90,12 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr = current->journal_info;
s64 nbuf;
- int alloced = tr->tr_alloced;
+ int alloced = test_bit(TR_ALLOCED, &tr->tr_flags);
BUG_ON(!tr);
current->journal_info = NULL;
- if (!tr->tr_touched) {
+ if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
gfs2_log_release(sdp, tr->tr_reserved);
if (alloced) {
kfree(tr);
@@ -112,8 +113,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
gfs2_print_trans(tr);
gfs2_log_commit(sdp, tr);
- if (alloced && !tr->tr_attached)
- kfree(tr);
+ if (alloced && !test_bit(TR_ATTACHED, &tr->tr_flags))
+ kfree(tr);
up_read(&sdp->sd_log_flush_lock);
if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
@@ -169,6 +170,10 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
}
lock_buffer(bh);
+ if (buffer_pinned(bh)) {
+ set_bit(TR_TOUCHED, &tr->tr_flags);
+ goto out;
+ }
gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd == NULL) {
@@ -182,7 +187,7 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
gfs2_log_lock(sdp);
}
gfs2_assert(sdp, bd->bd_gl == gl);
- tr->tr_touched = 1;
+ set_bit(TR_TOUCHED, &tr->tr_flags);
if (list_empty(&bd->bd_list)) {
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
@@ -191,45 +196,24 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
list_add_tail(&bd->bd_list, &tr->tr_databuf);
}
gfs2_log_unlock(sdp);
+out:
unlock_buffer(bh);
}
-static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
-{
- struct gfs2_meta_header *mh;
- struct gfs2_trans *tr;
- enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
-
- tr = current->journal_info;
- tr->tr_touched = 1;
- if (!list_empty(&bd->bd_list))
- return;
- set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
- set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
- mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
- if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
- pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n",
- (unsigned long long)bd->bd_bh->b_blocknr);
- BUG();
- }
- if (unlikely(state == SFS_FROZEN)) {
- printk(KERN_INFO "GFS2:adding buf while frozen\n");
- gfs2_assert_withdraw(sdp, 0);
- }
- gfs2_pin(sdp, bd->bd_bh);
- mh->__pad0 = cpu_to_be64(0);
- mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
- list_add(&bd->bd_list, &tr->tr_buf);
- tr->tr_num_buf_new++;
-}
-
void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_bufdata *bd;
+ struct gfs2_meta_header *mh;
+ struct gfs2_trans *tr = current->journal_info;
+ enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
lock_buffer(bh);
+ if (buffer_pinned(bh)) {
+ set_bit(TR_TOUCHED, &tr->tr_flags);
+ goto out;
+ }
gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd == NULL) {
@@ -245,8 +229,29 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
gfs2_log_lock(sdp);
}
gfs2_assert(sdp, bd->bd_gl == gl);
- meta_lo_add(sdp, bd);
+ set_bit(TR_TOUCHED, &tr->tr_flags);
+ if (!list_empty(&bd->bd_list))
+ goto out_unlock;
+ set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+ set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
+ mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
+ if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
+ pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n",
+ (unsigned long long)bd->bd_bh->b_blocknr);
+ BUG();
+ }
+ if (unlikely(state == SFS_FROZEN)) {
+ printk(KERN_INFO "GFS2:adding buf while frozen\n");
+ gfs2_assert_withdraw(sdp, 0);
+ }
+ gfs2_pin(sdp, bd->bd_bh);
+ mh->__pad0 = cpu_to_be64(0);
+ mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
+ list_add(&bd->bd_list, &tr->tr_buf);
+ tr->tr_num_buf_new++;
+out_unlock:
gfs2_log_unlock(sdp);
+out:
unlock_buffer(bh);
}
@@ -256,7 +261,7 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
BUG_ON(!list_empty(&bd->bd_list));
gfs2_add_revoke(sdp, bd);
- tr->tr_touched = 1;
+ set_bit(TR_TOUCHED, &tr->tr_flags);
tr->tr_num_revoke++;
}
diff --git a/fs/internal.h b/fs/internal.h
index b63cf3af2dc2..11c6d89dce9c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -182,7 +182,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
void *data, struct iomap *iomap);
loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
- unsigned flags, struct iomap_ops *ops, void *data,
+ unsigned flags, const struct iomap_ops *ops, void *data,
iomap_actor_t actor);
/* direct-io.c: */
diff --git a/fs/iomap.c b/fs/iomap.c
index 354a123f170e..d89f70bbb952 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -41,7 +41,7 @@
*/
loff_t
iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
- struct iomap_ops *ops, void *data, iomap_actor_t actor)
+ const struct iomap_ops *ops, void *data, iomap_actor_t actor)
{
struct iomap iomap = { 0 };
loff_t written = 0, ret;
@@ -114,6 +114,9 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
BUG_ON(pos + len > iomap->offset + iomap->length);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
if (!page)
return -ENOMEM;
@@ -232,7 +235,7 @@ again:
ssize_t
iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
loff_t pos = iocb->ki_pos, ret = 0, written = 0;
@@ -315,7 +318,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
int
iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
loff_t ret;
@@ -395,7 +398,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
int
iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
loff_t ret;
@@ -415,7 +418,7 @@ EXPORT_SYMBOL_GPL(iomap_zero_range);
int
iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
unsigned blocksize = (1 << inode->i_blkbits);
unsigned off = pos & (blocksize - 1);
@@ -443,7 +446,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
}
int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
struct page *page = vmf->page;
struct inode *inode = file_inode(vma->vm_file);
@@ -542,7 +545,7 @@ iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
}
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
- loff_t start, loff_t len, struct iomap_ops *ops)
+ loff_t start, loff_t len, const struct iomap_ops *ops)
{
struct fiemap_ctx ctx;
loff_t ret;
@@ -836,8 +839,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
}
ssize_t
-iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops,
- iomap_dio_end_io_t end_io)
+iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+ const struct iomap_ops *ops, iomap_dio_end_io_t end_io)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a097048ed1a3..85d1483939b2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -276,11 +276,11 @@ loop:
goto loop;
end_loop:
- write_unlock(&journal->j_state_lock);
del_timer_sync(&journal->j_commit_timer);
journal->j_task = NULL;
wake_up(&journal->j_wait_done_commit);
jbd_debug(1, "Journal thread exiting.\n");
+ write_unlock(&journal->j_state_lock);
return 0;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e1652665bd93..5e659ee08d6a 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1863,7 +1863,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
__blist_del_buffer(list, jh);
jh->b_jlist = BJ_None;
- if (test_clear_buffer_jbddirty(bh))
+ if (transaction && is_journal_aborted(transaction->t_journal))
+ clear_buffer_jbddirty(bh);
+ else if (test_clear_buffer_jbddirty(bh))
mark_buffer_dirty(bh); /* Expose it to the VM */
}
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index bd9b641ada2c..7ddcb445a3d9 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -98,7 +98,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
goto out;
}
- VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
+ VolumeSize = i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits;
if (VolumeSize) {
if (newLVSize > VolumeSize) {
@@ -211,7 +211,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
txQuiesce(sb);
/* Reset size of direct inode */
- sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size;
+ sbi->direct_inode->i_size = i_size_read(sb->s_bdev->bd_inode);
if (sbi->mntflag & JFS_INLINELOG) {
/*
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2be7c9ce6663..51881eb8ccff 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -283,7 +283,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
}
case Opt_resize_nosize:
{
- *newLVSize = sb->s_bdev->bd_inode->i_size >>
+ *newLVSize = i_size_read(sb->s_bdev->bd_inode) >>
sb->s_blocksize_bits;
if (*newLVSize == 0)
pr_err("JFS: Cannot determine volume size\n");
@@ -549,7 +549,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
goto out_unload;
}
inode->i_ino = 0;
- inode->i_size = sb->s_bdev->bd_inode->i_size;
+ inode->i_size = i_size_read(sb->s_bdev->bd_inode);
inode->i_mapping->a_ops = &jfs_metapage_aops;
hlist_add_fake(&inode->i_hash);
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
diff --git a/fs/libfs.c b/fs/libfs.c
index e973cd51f126..28d6f35feed6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -245,7 +245,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
struct inode *root;
struct qstr d_name = QSTR_INIT(name, strlen(name));
- s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL);
+ s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER,
+ &init_user_ns, NULL);
if (IS_ERR(s))
return ERR_CAST(s);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1c13dd80744f..7e4ea3b9f472 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
(struct sockaddr *)&sin6);
}
diff --git a/fs/namei.c b/fs/namei.c
index ad74877e1442..7d87699c3e2e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3353,13 +3353,50 @@ out:
return error;
}
+struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
+{
+ static const struct qstr name = QSTR_INIT("/", 1);
+ struct dentry *child = NULL;
+ struct inode *dir = dentry->d_inode;
+ struct inode *inode;
+ int error;
+
+ /* we want directory to be writable */
+ error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ if (error)
+ goto out_err;
+ error = -EOPNOTSUPP;
+ if (!dir->i_op->tmpfile)
+ goto out_err;
+ error = -ENOMEM;
+ child = d_alloc(dentry, &name);
+ if (unlikely(!child))
+ goto out_err;
+ error = dir->i_op->tmpfile(dir, child, mode);
+ if (error)
+ goto out_err;
+ error = -ENOENT;
+ inode = child->d_inode;
+ if (unlikely(!inode))
+ goto out_err;
+ if (!(open_flag & O_EXCL)) {
+ spin_lock(&inode->i_lock);
+ inode->i_state |= I_LINKABLE;
+ spin_unlock(&inode->i_lock);
+ }
+ return child;
+
+out_err:
+ dput(child);
+ return ERR_PTR(error);
+}
+EXPORT_SYMBOL(vfs_tmpfile);
+
static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
struct file *file, int *opened)
{
- static const struct qstr name = QSTR_INIT("/", 1);
struct dentry *child;
- struct inode *dir;
struct path path;
int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
if (unlikely(error))
@@ -3367,25 +3404,12 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
error = mnt_want_write(path.mnt);
if (unlikely(error))
goto out;
- dir = path.dentry->d_inode;
- /* we want directory to be writable */
- error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
- if (error)
- goto out2;
- if (!dir->i_op->tmpfile) {
- error = -EOPNOTSUPP;
- goto out2;
- }
- child = d_alloc(path.dentry, &name);
- if (unlikely(!child)) {
- error = -ENOMEM;
+ child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
+ error = PTR_ERR(child);
+ if (unlikely(IS_ERR(child)))
goto out2;
- }
dput(path.dentry);
path.dentry = child;
- error = dir->i_op->tmpfile(dir, child, op->mode);
- if (error)
- goto out2;
audit_inode(nd->name, child, 0);
/* Don't check for other permissions, the inode was just created */
error = may_open(&path, 0, op->open_flag);
@@ -3396,14 +3420,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
if (error)
goto out2;
error = open_check_o_direct(file);
- if (error) {
+ if (error)
fput(file);
- } else if (!(op->open_flag & O_EXCL)) {
- struct inode *inode = file_inode(file);
- spin_lock(&inode->i_lock);
- inode->i_state |= I_LINKABLE;
- spin_unlock(&inode->i_lock);
- }
out2:
mnt_drop_write(path.mnt);
out:
diff --git a/fs/namespace.c b/fs/namespace.c
index b5b1259e064f..487ba30bb5c6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -742,26 +742,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
return NULL;
}
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+static struct mountpoint *get_mountpoint(struct dentry *dentry)
{
- struct hlist_head *chain = mp_hash(dentry);
- struct mountpoint *mp;
+ struct mountpoint *mp, *new = NULL;
int ret;
- mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
- if (!mp)
+ if (d_mountpoint(dentry)) {
+mountpoint:
+ read_seqlock_excl(&mount_lock);
+ mp = lookup_mountpoint(dentry);
+ read_sequnlock_excl(&mount_lock);
+ if (mp)
+ goto done;
+ }
+
+ if (!new)
+ new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+ if (!new)
return ERR_PTR(-ENOMEM);
+
+ /* Exactly one processes may set d_mounted */
ret = d_set_mounted(dentry);
- if (ret) {
- kfree(mp);
- return ERR_PTR(ret);
- }
- mp->m_dentry = dentry;
- mp->m_count = 1;
- hlist_add_head(&mp->m_hash, chain);
- INIT_HLIST_HEAD(&mp->m_list);
+ /* Someone else set d_mounted? */
+ if (ret == -EBUSY)
+ goto mountpoint;
+
+ /* The dentry is not available as a mountpoint? */
+ mp = ERR_PTR(ret);
+ if (ret)
+ goto done;
+
+ /* Add the new mountpoint to the hash table */
+ read_seqlock_excl(&mount_lock);
+ new->m_dentry = dentry;
+ new->m_count = 1;
+ hlist_add_head(&new->m_hash, mp_hash(dentry));
+ INIT_HLIST_HEAD(&new->m_list);
+ read_sequnlock_excl(&mount_lock);
+
+ mp = new;
+ new = NULL;
+done:
+ kfree(new);
return mp;
}
@@ -1595,11 +1619,11 @@ void __detach_mounts(struct dentry *dentry)
struct mount *mnt;
namespace_lock();
+ lock_mount_hash();
mp = lookup_mountpoint(dentry);
if (IS_ERR_OR_NULL(mp))
goto out_unlock;
- lock_mount_hash();
event++;
while (!hlist_empty(&mp->m_list)) {
mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
@@ -1609,9 +1633,9 @@ void __detach_mounts(struct dentry *dentry)
}
else umount_tree(mnt, UMOUNT_CONNECTED);
}
- unlock_mount_hash();
put_mountpoint(mp);
out_unlock:
+ unlock_mount_hash();
namespace_unlock();
}
@@ -2038,9 +2062,7 @@ retry:
namespace_lock();
mnt = lookup_mnt(path);
if (likely(!mnt)) {
- struct mountpoint *mp = lookup_mountpoint(dentry);
- if (!mp)
- mp = new_mountpoint(dentry);
+ struct mountpoint *mp = get_mountpoint(dentry);
if (IS_ERR(mp)) {
namespace_unlock();
inode_unlock(dentry->d_inode);
@@ -2059,7 +2081,11 @@ retry:
static void unlock_mount(struct mountpoint *where)
{
struct dentry *dentry = where->m_dentry;
+
+ read_seqlock_excl(&mount_lock);
put_mountpoint(where);
+ read_sequnlock_excl(&mount_lock);
+
namespace_unlock();
inode_unlock(dentry->d_inode);
}
@@ -3135,9 +3161,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
touch_mnt_namespace(current->nsproxy->mnt_ns);
/* A moved mount should not expire automatically */
list_del_init(&new_mnt->mnt_expire);
+ put_mountpoint(root_mp);
unlock_mount_hash();
chroot_fs_refs(&root, &new);
- put_mountpoint(root_mp);
error = 0;
out4:
unlock_mount(old_mp);
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index f32f272ee501..f013c92bb5d8 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -40,19 +40,12 @@ static int _recv(struct socket *sock, void *buf, int size, unsigned flags)
return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
}
-static inline int do_send(struct socket *sock, struct kvec *vec, int count,
- int len, unsigned flags)
-{
- struct msghdr msg = { .msg_flags = flags };
- return kernel_sendmsg(sock, &msg, vec, count, len);
-}
-
static int _send(struct socket *sock, const void *buff, int len)
{
- struct kvec vec;
- vec.iov_base = (void *) buff;
- vec.iov_len = len;
- return do_send(sock, &vec, 1, len, 0);
+ struct msghdr msg = { .msg_flags = 0 };
+ struct kvec vec = {.iov_base = (void *)buff, .iov_len = len};
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &vec, 1, len);
+ return sock_sendmsg(sock, &msg);
}
struct ncp_request_reply {
@@ -63,9 +56,7 @@ struct ncp_request_reply {
size_t datalen;
int result;
enum { RQ_DONE, RQ_INPROGRESS, RQ_QUEUED, RQ_IDLE, RQ_ABANDONED } status;
- struct kvec* tx_ciov;
- size_t tx_totallen;
- size_t tx_iovlen;
+ struct iov_iter from;
struct kvec tx_iov[3];
u_int16_t tx_type;
u_int32_t sign[6];
@@ -205,28 +196,22 @@ static inline void __ncptcp_abort(struct ncp_server *server)
static int ncpdgram_send(struct socket *sock, struct ncp_request_reply *req)
{
- struct kvec vec[3];
- /* sock_sendmsg updates iov pointers for us :-( */
- memcpy(vec, req->tx_ciov, req->tx_iovlen * sizeof(vec[0]));
- return do_send(sock, vec, req->tx_iovlen,
- req->tx_totallen, MSG_DONTWAIT);
+ struct msghdr msg = { .msg_iter = req->from, .msg_flags = MSG_DONTWAIT };
+ return sock_sendmsg(sock, &msg);
}
static void __ncptcp_try_send(struct ncp_server *server)
{
struct ncp_request_reply *rq;
- struct kvec *iov;
- struct kvec iovc[3];
+ struct msghdr msg = { .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT };
int result;
rq = server->tx.creq;
if (!rq)
return;
- /* sock_sendmsg updates iov pointers for us :-( */
- memcpy(iovc, rq->tx_ciov, rq->tx_iovlen * sizeof(iov[0]));
- result = do_send(server->ncp_sock, iovc, rq->tx_iovlen,
- rq->tx_totallen, MSG_NOSIGNAL | MSG_DONTWAIT);
+ msg.msg_iter = rq->from;
+ result = sock_sendmsg(server->ncp_sock, &msg);
if (result == -EAGAIN)
return;
@@ -236,21 +221,12 @@ static void __ncptcp_try_send(struct ncp_server *server)
__ncp_abort_request(server, rq, result);
return;
}
- if (result >= rq->tx_totallen) {
+ if (!msg_data_left(&msg)) {
server->rcv.creq = rq;
server->tx.creq = NULL;
return;
}
- rq->tx_totallen -= result;
- iov = rq->tx_ciov;
- while (iov->iov_len <= result) {
- result -= iov->iov_len;
- iov++;
- rq->tx_iovlen--;
- }
- iov->iov_base += result;
- iov->iov_len -= result;
- rq->tx_ciov = iov;
+ rq->from = msg.msg_iter;
}
static inline void ncp_init_header(struct ncp_server *server, struct ncp_request_reply *req, struct ncp_request_header *h)
@@ -263,22 +239,21 @@ static inline void ncp_init_header(struct ncp_server *server, struct ncp_request
static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request_reply *req)
{
- size_t signlen;
- struct ncp_request_header* h;
+ size_t signlen, len = req->tx_iov[1].iov_len;
+ struct ncp_request_header *h = req->tx_iov[1].iov_base;
- req->tx_ciov = req->tx_iov + 1;
-
- h = req->tx_iov[1].iov_base;
ncp_init_header(server, req, h);
- signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
- req->tx_iov[1].iov_len - sizeof(struct ncp_request_header) + 1,
- cpu_to_le32(req->tx_totallen), req->sign);
+ signlen = sign_packet(server,
+ req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
+ len - sizeof(struct ncp_request_header) + 1,
+ cpu_to_le32(len), req->sign);
if (signlen) {
- req->tx_ciov[1].iov_base = req->sign;
- req->tx_ciov[1].iov_len = signlen;
- req->tx_iovlen += 1;
- req->tx_totallen += signlen;
+ /* NCP over UDP appends signature */
+ req->tx_iov[2].iov_base = req->sign;
+ req->tx_iov[2].iov_len = signlen;
}
+ iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
+ req->tx_iov + 1, signlen ? 2 : 1, len + signlen);
server->rcv.creq = req;
server->timeout_last = server->m.time_out;
server->timeout_retries = server->m.retry_count;
@@ -292,24 +267,23 @@ static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request
static void ncptcp_start_request(struct ncp_server *server, struct ncp_request_reply *req)
{
- size_t signlen;
- struct ncp_request_header* h;
+ size_t signlen, len = req->tx_iov[1].iov_len;
+ struct ncp_request_header *h = req->tx_iov[1].iov_base;
- req->tx_ciov = req->tx_iov;
- h = req->tx_iov[1].iov_base;
ncp_init_header(server, req, h);
signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
- req->tx_iov[1].iov_len - sizeof(struct ncp_request_header) + 1,
- cpu_to_be32(req->tx_totallen + 24), req->sign + 4) + 16;
+ len - sizeof(struct ncp_request_header) + 1,
+ cpu_to_be32(len + 24), req->sign + 4) + 16;
req->sign[0] = htonl(NCP_TCP_XMIT_MAGIC);
- req->sign[1] = htonl(req->tx_totallen + signlen);
+ req->sign[1] = htonl(len + signlen);
req->sign[2] = htonl(NCP_TCP_XMIT_VERSION);
req->sign[3] = htonl(req->datalen + 8);
+ /* NCP over TCP prepends signature */
req->tx_iov[0].iov_base = req->sign;
req->tx_iov[0].iov_len = signlen;
- req->tx_iovlen += 1;
- req->tx_totallen += signlen;
+ iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
+ req->tx_iov, 2, len + signlen);
server->tx.creq = req;
__ncptcp_try_send(server);
@@ -364,18 +338,17 @@ static void __ncp_next_request(struct ncp_server *server)
static void info_server(struct ncp_server *server, unsigned int id, const void * data, size_t len)
{
if (server->info_sock) {
- struct kvec iov[2];
- __be32 hdr[2];
-
- hdr[0] = cpu_to_be32(len + 8);
- hdr[1] = cpu_to_be32(id);
-
- iov[0].iov_base = hdr;
- iov[0].iov_len = 8;
- iov[1].iov_base = (void *) data;
- iov[1].iov_len = len;
+ struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
+ __be32 hdr[2] = {cpu_to_be32(len + 8), cpu_to_be32(id)};
+ struct kvec iov[2] = {
+ {.iov_base = hdr, .iov_len = 8},
+ {.iov_base = (void *)data, .iov_len = len},
+ };
+
+ iov_iter_kvec(&msg.msg_iter, ITER_KVEC | WRITE,
+ iov, 2, len + 8);
- do_send(server->info_sock, iov, 2, len + 8, MSG_NOSIGNAL);
+ sock_sendmsg(server->info_sock, &msg);
}
}
@@ -711,8 +684,6 @@ static int do_ncp_rpc_call(struct ncp_server *server, int size,
req->datalen = max_reply_size;
req->tx_iov[1].iov_base = server->packet;
req->tx_iov[1].iov_len = size;
- req->tx_iovlen = 1;
- req->tx_totallen = size;
req->tx_type = *(u_int16_t*)server->packet;
result = ncp_add_request(server, req);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6dcbc5defb7a..0a0eaecf9676 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -38,7 +38,6 @@
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/errno.h>
-#include <linux/file.h>
#include <linux/string.h>
#include <linux/ratelimit.h>
#include <linux/printk.h>
@@ -1083,7 +1082,8 @@ int nfs4_call_sync(struct rpc_clnt *clnt,
return nfs4_call_sync_sequence(clnt, server, msg, args, res);
}
-static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
+static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
+ unsigned long timestamp)
{
struct nfs_inode *nfsi = NFS_I(dir);
@@ -1099,6 +1099,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
NFS_INO_INVALID_ACL;
}
dir->i_version = cinfo->after;
+ nfsi->read_cache_jiffies = timestamp;
nfsi->attr_gencount = nfs_inc_attr_generation_counter();
nfs_fscache_invalidate(dir);
spin_unlock(&dir->i_lock);
@@ -2391,11 +2392,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
nfs_fattr_map_and_free_names(server, &data->f_attr);
if (o_arg->open_flags & O_CREAT) {
- update_changeattr(dir, &o_res->cinfo);
if (o_arg->open_flags & O_EXCL)
data->file_created = 1;
else if (o_res->cinfo.before != o_res->cinfo.after)
data->file_created = 1;
+ if (data->file_created || dir->i_version != o_res->cinfo.after)
+ update_changeattr(dir, &o_res->cinfo,
+ o_res->f_attr->time_start);
}
if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
server->caps &= ~NFS_CAP_POSIX_LOCK;
@@ -2697,7 +2700,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
sattr->ia_valid |= ATTR_MTIME;
/* Except MODE, it seems harmless of setting twice. */
- if ((attrset[1] & FATTR4_WORD1_MODE))
+ if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
+ attrset[1] & FATTR4_WORD1_MODE)
sattr->ia_valid &= ~ATTR_MODE;
if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -4073,11 +4077,12 @@ static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name)
.rpc_argp = &args,
.rpc_resp = &res,
};
+ unsigned long timestamp = jiffies;
int status;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
if (status == 0)
- update_changeattr(dir, &res.cinfo);
+ update_changeattr(dir, &res.cinfo, timestamp);
return status;
}
@@ -4125,7 +4130,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
if (nfs4_async_handle_error(task, res->server, NULL,
&data->timeout) == -EAGAIN)
return 0;
- update_changeattr(dir, &res->cinfo);
+ if (task->tk_status == 0)
+ update_changeattr(dir, &res->cinfo, res->dir_attr->time_start);
return 1;
}
@@ -4159,8 +4165,11 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN)
return 0;
- update_changeattr(old_dir, &res->old_cinfo);
- update_changeattr(new_dir, &res->new_cinfo);
+ if (task->tk_status == 0) {
+ update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start);
+ if (new_dir != old_dir)
+ update_changeattr(new_dir, &res->new_cinfo, res->new_fattr->time_start);
+ }
return 1;
}
@@ -4197,7 +4206,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
- update_changeattr(dir, &res.cinfo);
+ update_changeattr(dir, &res.cinfo, res.fattr->time_start);
status = nfs_post_op_update_inode(inode, res.fattr);
if (!status)
nfs_setsecurity(inode, res.fattr, res.label);
@@ -4272,7 +4281,8 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
- update_changeattr(dir, &data->res.dir_cinfo);
+ update_changeattr(dir, &data->res.dir_cinfo,
+ data->res.fattr->time_start);
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
}
return status;
@@ -6127,7 +6137,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->server = server;
atomic_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
- get_file(fl->fl_file);
memcpy(&p->fl, fl, sizeof(p->fl));
return p;
out_free_seqid:
@@ -6240,7 +6249,6 @@ static void nfs4_lock_release(void *calldata)
nfs_free_seqid(data->arg.lock_seqid);
nfs4_put_lock_state(data->lsp);
put_nfs_open_context(data->ctx);
- fput(data->fl.fl_file);
kfree(data);
dprintk("%s: done!\n", __func__);
}
@@ -8483,6 +8491,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
goto out;
}
+ nfs4_sequence_free_slot(&lgp->res.seq_res);
err = nfs4_handle_exception(server, nfs4err, exception);
if (!status) {
if (exception->retry)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1d152f4470cd..daeb94e3acd4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1091,6 +1091,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
case -NFS4ERR_BADXDR:
case -NFS4ERR_RESOURCE:
case -NFS4ERR_NOFILEHANDLE:
+ case -NFS4ERR_MOVED:
/* Non-seqid mutating errors */
return;
};
@@ -1729,7 +1730,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
break;
case -NFS4ERR_STALE_CLIENTID:
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
- nfs4_state_clear_reclaim_reboot(clp);
nfs4_state_start_reclaim_reboot(clp);
break;
case -NFS4ERR_EXPIRED:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 59554f3adf29..dd042498ce7c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1200,10 +1200,10 @@ _pnfs_return_layout(struct inode *ino)
send = pnfs_prepare_layoutreturn(lo, &stateid, NULL);
spin_unlock(&ino->i_lock);
- pnfs_free_lseg_list(&tmp_list);
if (send)
status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
out_put_layout_hdr:
+ pnfs_free_lseg_list(&tmp_list);
pnfs_put_layout_hdr(lo);
out:
dprintk("<-- %s status: %d\n", __func__, status);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 43e109cc0ccc..e71f11b1a180 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1102,6 +1102,7 @@ static struct flags {
{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
{ NFSEXP_V4ROOT, {"v4root", ""}},
{ NFSEXP_PNFS, {"pnfs", ""}},
+ { NFSEXP_SECURITY_LABEL, {"security_label", ""}},
{ 0, {"", ""}}
};
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index d818e4ffd79f..045c9081eabe 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
- nfserr = nfsd_write(rqstp, &resp->fh, NULL,
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &cnt,
- &resp->committed);
+ nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
+ rqstp->rq_vec, argp->vlen,
+ &cnt, resp->committed);
resp->count = cnt;
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 596205d939a1..1fc07a9c70e9 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -223,10 +223,11 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
struct nfs4_layout_stateid *ls;
struct nfs4_stid *stp;
- stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache);
+ stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache,
+ nfsd4_free_layout_stateid);
if (!stp)
return NULL;
- stp->sc_free = nfsd4_free_layout_stateid;
+
get_nfs4_file(fp);
stp->sc_file = fp;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 74a6e573e061..171f2d7ecfdd 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u32 *bmval, u32 *writable)
{
struct dentry *dentry = cstate->current_fh.fh_dentry;
+ struct svc_export *exp = cstate->current_fh.fh_export;
if (!nfsd_attrs_supported(cstate->minorversion, bmval))
return nfserr_attrnotsupp;
if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
return nfserr_attrnotsupp;
+ if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) &&
+ !(exp->ex_flags & NFSEXP_SECURITY_LABEL))
+ return nfserr_attrnotsupp;
if (writable && !bmval_is_subset(bmval, writable))
return nfserr_inval;
if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
@@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- &write->wr_how_written);
+ write->wr_how_written);
fput(filp);
write->wr_bytes_written = cnt;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4b4beaaa4eaa..a0dee8ae9f97 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -633,8 +633,8 @@ out:
return co;
}
-struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
- struct kmem_cache *slab)
+struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
+ void (*sc_free)(struct nfs4_stid *))
{
struct nfs4_stid *stid;
int new_id;
@@ -650,6 +650,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
idr_preload_end();
if (new_id < 0)
goto out_free;
+
+ stid->sc_free = sc_free;
stid->sc_client = cl;
stid->sc_stateid.si_opaque.so_id = new_id;
stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
@@ -675,15 +677,12 @@ out_free:
static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
{
struct nfs4_stid *stid;
- struct nfs4_ol_stateid *stp;
- stid = nfs4_alloc_stid(clp, stateid_slab);
+ stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid);
if (!stid)
return NULL;
- stp = openlockstateid(stid);
- stp->st_stid.sc_free = nfs4_free_ol_stateid;
- return stp;
+ return openlockstateid(stid);
}
static void nfs4_free_deleg(struct nfs4_stid *stid)
@@ -781,11 +780,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
goto out_dec;
if (delegation_blocked(&current_fh->fh_handle))
goto out_dec;
- dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
+ dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg));
if (dp == NULL)
goto out_dec;
- dp->dl_stid.sc_free = nfs4_free_deleg;
/*
* delegation seqid's are never incremented. The 4.1 special
* meaning of seqid 0 isn't meaningful, really, but let's avoid
@@ -5580,7 +5578,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
get_nfs4_file(fp);
stp->st_stid.sc_file = fp;
- stp->st_stid.sc_free = nfs4_free_lock_stateid;
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
@@ -5623,7 +5620,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
lst = find_lock_stateid(lo, fi);
if (lst == NULL) {
spin_unlock(&clp->cl_lock);
- ns = nfs4_alloc_stid(clp, stateid_slab);
+ ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid);
if (ns == NULL)
return NULL;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 7ecf16be4a44..7e4df80456ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -58,7 +58,7 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR
-u32 nfsd_suppattrs[3][3] = {
+const u32 nfsd_suppattrs[3][3] = {
{NFSD4_SUPPORTED_ATTRS_WORD0,
NFSD4_SUPPORTED_ATTRS_WORD1,
NFSD4_SUPPORTED_ATTRS_WORD2},
@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
READ_BUF(16);
p = xdr_decode_hyper(p, &write->wr_offset);
write->wr_stable_how = be32_to_cpup(p++);
- if (write->wr_stable_how > 2)
+ if (write->wr_stable_how > NFS_FILE_SYNC)
goto xdr_error;
write->wr_buflen = be32_to_cpup(p++);
@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
DECODE_TAIL;
}
-static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
+static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+ struct svc_export *exp)
{
- if (IS_I_VERSION(inode)) {
+ if (exp->ex_flags & NFSEXP_V4ROOT) {
+ *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
+ *p++ = 0;
+ } else if (IS_I_VERSION(inode)) {
p = xdr_encode_hyper(p, inode->i_version);
} else {
*p++ = cpu_to_be32(stat->ctime.tv_sec);
@@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
- err = security_inode_getsecctx(d_inode(dentry),
+ if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
+ err = security_inode_getsecctx(d_inode(dentry),
&context, &contextlen);
+ else
+ err = -EOPNOTSUPP;
contextsupport = (err == 0);
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
@@ -2440,7 +2447,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p++; /* to be backfilled later */
if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
- u32 *supp = nfsd_suppattrs[minorversion];
+ u32 supp[3];
+
+ memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp));
if (!IS_POSIXACL(dentry->d_inode))
supp[0] &= ~FATTR4_WORD0_ACL;
@@ -2488,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p = xdr_reserve_space(xdr, 8);
if (!p)
goto out_resource;
- p = encode_change(p, &stat, d_inode(dentry));
+ p = encode_change(p, &stat, d_inode(dentry), exp);
}
if (bmval0 & FATTR4_WORD0_SIZE) {
p = xdr_reserve_space(xdr, 8);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f3b2f34b10a3..d54fb0e3f30e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -569,8 +569,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (*minorp == '.') {
if (num != 4)
return -EINVAL;
- minor = simple_strtoul(minorp+1, NULL, 0);
- if (minor == 0)
+ if (kstrtouint(minorp+1, 0, &minor) < 0)
return -EINVAL;
if (nfsd_minorversion(minor, sign == '-' ?
NFSD_CLEAR : NFSD_SET) < 0)
@@ -613,8 +612,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
tlen += len;
}
if (nfsd_vers(4, NFSD_AVAIL))
- for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
+ for (minor = 0; minor <= NFSD_SUPPORTED_MINOR_VERSION;
minor++) {
+ if (minor == 0 && nfsd_minorversion(minor, NFSD_TEST))
+ /* for backward compatibility, don't report
+ * +4.0
+ */
+ continue;
len = snprintf(buf, remaining, " %c4.%u",
(nfsd_vers(4, NFSD_TEST) &&
nfsd_minorversion(minor, NFSD_TEST)) ?
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d74c8c44dc35..d96606801d47 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -362,16 +362,16 @@ void nfsd_lockd_shutdown(void);
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS)
-extern u32 nfsd_suppattrs[3][3];
+extern const u32 nfsd_suppattrs[3][3];
-static inline bool bmval_is_subset(u32 *bm1, u32 *bm2)
+static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
{
return !((bm1[0] & ~bm2[0]) ||
(bm1[1] & ~bm2[1]) ||
(bm1[2] & ~bm2[2]));
}
-static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
+static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
{
return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 010aff5c5a79..fa82b7707e85 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
struct nfsd_attrstat *resp)
{
__be32 nfserr;
- int stable = 1;
unsigned long cnt = argp->len;
dprintk("nfsd: WRITE %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &cnt,
- &stable);
+ nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
+ rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
return nfsd_return_attrs(nfserr, resp);
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e6bfd96734c0..2e378d0479ad 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -354,6 +354,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index c9399366f9df..4516e8b7d776 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -603,8 +603,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
-struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
- struct kmem_cache *slab);
+struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
+ void (*sc_free)(struct nfs4_stid *));
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 26c6fdb4bf67..2c132162796e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -940,14 +940,12 @@ static int wait_for_concurrent_writes(struct file *file)
__be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int *stablep)
+ unsigned long *cnt, int stable)
{
struct svc_export *exp;
- struct inode *inode;
mm_segment_t oldfs;
__be32 err = 0;
int host_err;
- int stable = *stablep;
int use_wgather;
loff_t pos = offset;
unsigned int pflags = current->flags;
@@ -962,13 +960,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
*/
current->flags |= PF_LESS_THROTTLE;
- inode = file_inode(file);
- exp = fhp->fh_export;
-
+ exp = fhp->fh_export;
use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!EX_ISSYNC(exp))
- stable = 0;
+ stable = NFS_UNSTABLE;
if (stable && !use_wgather)
flags |= RWF_SYNC;
@@ -1035,35 +1031,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
* N.B. After this call fhp needs an fh_put
*/
__be32
-nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
- int *stablep)
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+ struct kvec *vec, int vlen, unsigned long *cnt, int stable)
{
- __be32 err = 0;
+ struct file *file = NULL;
+ __be32 err = 0;
trace_write_start(rqstp, fhp, offset, vlen);
- if (file) {
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- trace_write_opened(rqstp, fhp, offset, vlen);
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
- stablep);
- trace_write_io_done(rqstp, fhp, offset, vlen);
- } else {
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
- if (err)
- goto out;
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+ if (err)
+ goto out;
- trace_write_opened(rqstp, fhp, offset, vlen);
- if (cnt)
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
- cnt, stablep);
- trace_write_io_done(rqstp, fhp, offset, vlen);
- fput(file);
- }
+ trace_write_opened(rqstp, fhp, offset, vlen);
+ err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
+ trace_write_io_done(rqstp, fhp, offset, vlen);
+ fput(file);
out:
trace_write_done(rqstp, fhp, offset, vlen);
return err;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 0bf9e7bf5800..db98c48c735a 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -83,12 +83,12 @@ __be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
-__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
- loff_t, struct kvec *,int, unsigned long *, int *);
+__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
+ struct kvec *, int, unsigned long *, int);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt,
- int *stablep);
+ int stable);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index bbc175d4213d..a4c46221755e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -31,7 +31,6 @@ static bool should_merge(struct fsnotify_event *old_fsn,
static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
{
struct fsnotify_event *test_event;
- bool do_merge = false;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
@@ -47,16 +46,12 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
list_for_each_entry_reverse(test_event, list, list) {
if (should_merge(test_event, event)) {
- do_merge = true;
- break;
+ test_event->mask |= event->mask;
+ return 1;
}
}
- if (!do_merge)
- return 0;
-
- test_event->mask |= event->mask;
- return 1;
+ return 0;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d3fea0bd89e2..6043306e8e21 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -510,18 +510,6 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group)
}
}
-void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
-{
- assert_spin_locked(&old->lock);
- new->inode = old->inode;
- new->mnt = old->mnt;
- if (old->group)
- fsnotify_get_group(old->group);
- new->group = old->group;
- new->mask = old->mask;
- new->free_mark = old->free_mark;
-}
-
/*
* Nothing fancy, just initialize lists and locks and counters.
*/
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 83d576f6a287..77d1632e905d 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
lockres->l_level, new_level);
+ /*
+ * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
+ * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
+ * we can recover correctly from node failure. Otherwise, we may get
+ * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
+ */
+ if (!ocfs2_is_o2cb_active() &&
+ lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
+ lvb = 1;
+
if (lvb)
dlm_flags |= DLM_LKF_VALBLK;
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 52c07346bea3..820359096c7a 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
*/
static struct ocfs2_stack_plugin *active_stack;
+inline int ocfs2_is_o2cb_active(void)
+{
+ return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
+}
+EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
+
static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
{
struct ocfs2_stack_plugin *p;
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index f2dce10fae54..e3036e1790e8 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
+/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
+int ocfs2_is_o2cb_active(void);
+
extern struct kset *ocfs2_kset;
#endif /* STACKGLUE_H */
diff --git a/fs/open.c b/fs/open.c
index 9921f70bc5ca..949cef29c3bb 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -301,12 +301,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (S_ISFIFO(inode->i_mode))
return -ESPIPE;
- /*
- * Let individual file system decide if it supports preallocation
- * for directories or not.
- */
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
- !S_ISBLK(inode->i_mode))
+ if (S_ISDIR(inode->i_mode))
+ return -EISDIR;
+
+ if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
return -ENODEV;
/* Check for wrap through zero too */
@@ -316,7 +314,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!file->f_op->fallocate)
return -EOPNOTSUPP;
- sb_start_write(inode->i_sb);
+ file_start_write(file);
ret = file->f_op->fallocate(file, mode, offset, len);
/*
@@ -329,7 +327,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (ret == 0)
fsnotify_modify(file);
- sb_end_write(inode->i_sb);
+ file_end_write(file);
return ret;
}
EXPORT_SYMBOL_GPL(vfs_fallocate);
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index b0ced669427e..c4ab6fdf17a0 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -400,8 +400,9 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
/* remove the op from the in progress hash table */
op = orangefs_devreq_remove_op(head.tag);
if (!op) {
- gossip_err("WARNING: No one's waiting for tag %llu\n",
- llu(head.tag));
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: No one's waiting for tag %llu\n",
+ __func__, llu(head.tag));
return ret;
}
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 551bc74ed2b8..5cd617980fbf 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -136,12 +136,6 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
return -EINVAL;
}
-struct backing_dev_info orangefs_backing_dev_info = {
- .name = "orangefs",
- .ra_pages = 0,
- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
-};
-
/** ORANGEFS2 implementation of address space operations */
const struct address_space_operations orangefs_address_operations = {
.readpage = orangefs_readpage,
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 75375e90a63f..6333cbbdfef7 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -344,6 +344,11 @@ int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc)
user_desc->size,
user_desc->count);
+ if (user_desc->total_size < 0 ||
+ user_desc->size < 0 ||
+ user_desc->count < 0)
+ goto out;
+
/*
* sanity check alignment and size of buffer that caller wants to
* work with
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 27e75cf28b3a..791912da97d7 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -967,13 +967,13 @@ int orangefs_debugfs_new_client_string(void __user *arg)
int ret;
ret = copy_from_user(&client_debug_array_string,
- (void __user *)arg,
- ORANGEFS_MAX_DEBUG_STRING_LEN);
+ (void __user *)arg,
+ ORANGEFS_MAX_DEBUG_STRING_LEN);
if (ret != 0) {
pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
__func__);
- return -EIO;
+ return -EFAULT;
}
/*
@@ -988,17 +988,18 @@ int orangefs_debugfs_new_client_string(void __user *arg)
*/
client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
'\0';
-
+
pr_info("%s: client debug array string has been received.\n",
__func__);
if (!help_string_initialized) {
/* Build a proper debug help string. */
- if (orangefs_prepare_debugfs_help_string(0)) {
+ ret = orangefs_prepare_debugfs_help_string(0);
+ if (ret) {
gossip_err("%s: no debug help string \n",
__func__);
- return -EIO;
+ return ret;
}
}
@@ -1011,7 +1012,7 @@ int orangefs_debugfs_new_client_string(void __user *arg)
help_string_initialized++;
- return ret;
+ return 0;
}
int orangefs_debugfs_new_debug(void __user *arg)
diff --git a/fs/orangefs/orangefs-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h
index a3d84ffee905..f380f9ed1b28 100644
--- a/fs/orangefs/orangefs-dev-proto.h
+++ b/fs/orangefs/orangefs-dev-proto.h
@@ -50,8 +50,7 @@
* Misc constants. Please retain them as multiples of 8!
* Otherwise 32-64 bit interactions will be messed up :)
*/
-#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000400
-#define ORANGEFS_MAX_DEBUG_ARRAY_LEN 0x00000800
+#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000800
/*
* The maximum number of directory entries in a single request is 96.
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 3bf803d732c5..70355a9a2596 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -529,7 +529,6 @@ extern spinlock_t orangefs_htable_ops_in_progress_lock;
extern int hash_table_size;
extern const struct address_space_operations orangefs_address_operations;
-extern struct backing_dev_info orangefs_backing_dev_info;
extern const struct inode_operations orangefs_file_inode_operations;
extern const struct file_operations orangefs_file_operations;
extern const struct inode_operations orangefs_symlink_inode_operations;
diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
index 4113eb0495bf..c1b5174cb5a9 100644
--- a/fs/orangefs/orangefs-mod.c
+++ b/fs/orangefs/orangefs-mod.c
@@ -80,11 +80,6 @@ static int __init orangefs_init(void)
int ret = -1;
__u32 i = 0;
- ret = bdi_init(&orangefs_backing_dev_info);
-
- if (ret)
- return ret;
-
if (op_timeout_secs < 0)
op_timeout_secs = 0;
@@ -94,7 +89,7 @@ static int __init orangefs_init(void)
/* initialize global book keeping data structures */
ret = op_cache_initialize();
if (ret < 0)
- goto err;
+ goto out;
ret = orangefs_inode_cache_initialize();
if (ret < 0)
@@ -181,9 +176,6 @@ cleanup_inode:
cleanup_op:
op_cache_finalize();
-err:
- bdi_destroy(&orangefs_backing_dev_info);
-
out:
return ret;
}
@@ -207,8 +199,6 @@ static void __exit orangefs_exit(void)
kfree(orangefs_htable_ops_in_progress);
- bdi_destroy(&orangefs_backing_dev_info);
-
pr_info("orangefs: module version %s unloaded\n", ORANGEFS_VERSION);
}
diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c
index 084954448f18..afd2f523b283 100644
--- a/fs/orangefs/orangefs-sysfs.c
+++ b/fs/orangefs/orangefs-sysfs.c
@@ -91,6 +91,13 @@
* Description:
* Readahead cache buffer count and size.
*
+ * What: /sys/fs/orangefs/readahead_readcnt
+ * Date: Jan 2017
+ * Contact: Martin Brandenburg <martin@omnibond.com>
+ * Description:
+ * Number of buffers (in multiples of readahead_size)
+ * which can be read ahead for a single file at once.
+ *
* What: /sys/fs/orangefs/acache/...
* Date: Jun 2015
* Contact: Martin Brandenburg <martin@omnibond.com>
@@ -329,7 +336,8 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj,
if (!(orangefs_features & ORANGEFS_FEATURE_READAHEAD) &&
(!strcmp(attr->attr.name, "readahead_count") ||
!strcmp(attr->attr.name, "readahead_size") ||
- !strcmp(attr->attr.name, "readahead_count_size"))) {
+ !strcmp(attr->attr.name, "readahead_count_size") ||
+ !strcmp(attr->attr.name, "readahead_readcnt"))) {
rc = -EINVAL;
goto out;
}
@@ -360,6 +368,11 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj,
"readahead_count_size"))
new_op->upcall.req.param.op =
ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT_SIZE;
+
+ else if (!strcmp(attr->attr.name,
+ "readahead_readcnt"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT;
} else if (!strcmp(kobj->name, ACACHE_KOBJ_ID)) {
if (!strcmp(attr->attr.name, "timeout_msecs"))
new_op->upcall.req.param.op =
@@ -542,7 +555,8 @@ static ssize_t sysfs_service_op_store(struct kobject *kobj,
if (!(orangefs_features & ORANGEFS_FEATURE_READAHEAD) &&
(!strcmp(attr->attr.name, "readahead_count") ||
!strcmp(attr->attr.name, "readahead_size") ||
- !strcmp(attr->attr.name, "readahead_count_size"))) {
+ !strcmp(attr->attr.name, "readahead_count_size") ||
+ !strcmp(attr->attr.name, "readahead_readcnt"))) {
rc = -EINVAL;
goto out;
}
@@ -609,6 +623,15 @@ static ssize_t sysfs_service_op_store(struct kobject *kobj,
new_op->upcall.req.param.u.value32[0] = val1;
new_op->upcall.req.param.u.value32[1] = val2;
goto value_set;
+ } else if (!strcmp(attr->attr.name,
+ "readahead_readcnt")) {
+ if ((val >= 0)) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT;
+ } else {
+ rc = 0;
+ goto out;
+ }
}
} else if (!strcmp(kobj->name, ACACHE_KOBJ_ID)) {
@@ -812,6 +835,10 @@ static struct orangefs_attribute readahead_count_size_attribute =
__ATTR(readahead_count_size, 0664, sysfs_service_op_show,
sysfs_service_op_store);
+static struct orangefs_attribute readahead_readcnt_attribute =
+ __ATTR(readahead_readcnt, 0664, sysfs_service_op_show,
+ sysfs_service_op_store);
+
static struct orangefs_attribute perf_counter_reset_attribute =
__ATTR(perf_counter_reset,
0664,
@@ -838,6 +865,7 @@ static struct attribute *orangefs_default_attrs[] = {
&readahead_count_attribute.attr,
&readahead_size_attribute.attr,
&readahead_count_size_attribute.attr,
+ &readahead_readcnt_attribute.attr,
&perf_counter_reset_attribute.attr,
&perf_history_size_attribute.attr,
&perf_time_interval_secs_attribute.attr,
diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h
index af0b0e36d559..b8249f8fdd80 100644
--- a/fs/orangefs/upcall.h
+++ b/fs/orangefs/upcall.h
@@ -182,6 +182,7 @@ enum orangefs_param_request_op {
ORANGEFS_PARAM_REQUEST_OP_READAHEAD_SIZE = 26,
ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT = 27,
ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT_SIZE = 28,
+ ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT = 29,
};
struct orangefs_param_request_s {
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f57043dace62..48eb8812ac5b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -20,6 +20,7 @@
#include <linux/fdtable.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
+#include "ovl_entry.h"
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
@@ -232,12 +233,14 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
struct dentry *dentry, struct path *lowerpath,
- struct kstat *stat, const char *link)
+ struct kstat *stat, const char *link,
+ struct kstat *pstat, bool tmpfile)
{
struct inode *wdir = workdir->d_inode;
struct inode *udir = upperdir->d_inode;
struct dentry *newdentry = NULL;
struct dentry *upper = NULL;
+ struct dentry *temp = NULL;
int err;
const struct cred *old_creds = NULL;
struct cred *new_creds = NULL;
@@ -248,25 +251,30 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
.link = link
};
- newdentry = ovl_lookup_temp(workdir, dentry);
- err = PTR_ERR(newdentry);
- if (IS_ERR(newdentry))
- goto out;
-
upper = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
- goto out1;
+ goto out;
err = security_inode_copy_up(dentry, &new_creds);
if (err < 0)
- goto out2;
+ goto out1;
if (new_creds)
old_creds = override_creds(new_creds);
- err = ovl_create_real(wdir, newdentry, &cattr, NULL, true);
+ if (tmpfile)
+ temp = ovl_do_tmpfile(upperdir, stat->mode);
+ else
+ temp = ovl_lookup_temp(workdir, dentry);
+ err = PTR_ERR(temp);
+ if (IS_ERR(temp))
+ goto out1;
+
+ err = 0;
+ if (!tmpfile)
+ err = ovl_create_real(wdir, temp, &cattr, NULL, true);
if (new_creds) {
revert_creds(old_creds);
@@ -281,39 +289,55 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
ovl_path_upper(dentry, &upperpath);
BUG_ON(upperpath.dentry != NULL);
- upperpath.dentry = newdentry;
+ upperpath.dentry = temp;
+
+ if (tmpfile) {
+ inode_unlock(udir);
+ err = ovl_copy_up_data(lowerpath, &upperpath,
+ stat->size);
+ inode_lock_nested(udir, I_MUTEX_PARENT);
+ } else {
+ err = ovl_copy_up_data(lowerpath, &upperpath,
+ stat->size);
+ }
- err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
if (err)
goto out_cleanup;
}
- err = ovl_copy_xattr(lowerpath->dentry, newdentry);
+ err = ovl_copy_xattr(lowerpath->dentry, temp);
if (err)
goto out_cleanup;
- inode_lock(newdentry->d_inode);
- err = ovl_set_attr(newdentry, stat);
- inode_unlock(newdentry->d_inode);
+ inode_lock(temp->d_inode);
+ err = ovl_set_attr(temp, stat);
+ inode_unlock(temp->d_inode);
if (err)
goto out_cleanup;
- err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+ if (tmpfile)
+ err = ovl_do_link(temp, udir, upper, true);
+ else
+ err = ovl_do_rename(wdir, temp, udir, upper, 0);
if (err)
goto out_cleanup;
+ newdentry = dget(tmpfile ? upper : temp);
ovl_dentry_update(dentry, newdentry);
ovl_inode_update(d_inode(dentry), d_inode(newdentry));
- newdentry = NULL;
+
+ /* Restore timestamps on parent (best effort) */
+ ovl_set_timestamps(upperdir, pstat);
out2:
- dput(upper);
+ dput(temp);
out1:
- dput(newdentry);
+ dput(upper);
out:
return err;
out_cleanup:
- ovl_cleanup(wdir, newdentry);
+ if (!tmpfile)
+ ovl_cleanup(wdir, temp);
goto out2;
}
@@ -337,6 +361,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
struct dentry *lowerdentry = lowerpath->dentry;
struct dentry *upperdir;
const char *link = NULL;
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
if (WARN_ON(!workdir))
return -EROFS;
@@ -356,6 +381,25 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return PTR_ERR(link);
}
+ /* Should we copyup with O_TMPFILE or with workdir? */
+ if (S_ISREG(stat->mode) && ofs->tmpfile) {
+ err = ovl_copy_up_start(dentry);
+ /* err < 0: interrupted, err > 0: raced with another copy-up */
+ if (unlikely(err)) {
+ pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
+ if (err > 0)
+ err = 0;
+ goto out_done;
+ }
+
+ inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
+ err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
+ stat, link, &pstat, true);
+ inode_unlock(upperdir->d_inode);
+ ovl_copy_up_end(dentry);
+ goto out_done;
+ }
+
err = -EIO;
if (lock_rename(workdir, upperdir) != NULL) {
pr_err("overlayfs: failed to lock workdir+upperdir\n");
@@ -368,13 +412,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
}
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
- stat, link);
- if (!err) {
- /* Restore timestamps on parent (best effort) */
- ovl_set_timestamps(upperdir, &pstat);
- }
+ stat, link, &pstat, false);
out_unlock:
unlock_rename(workdir, upperdir);
+out_done:
do_delayed_call(&done);
return err;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 9ad48d9202a9..023bb0b03352 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -154,29 +154,38 @@ out_err:
static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
struct dentry **ret)
{
- const char *s = d->name.name;
+ /* Counting down from the end, since the prefix can change */
+ size_t rem = d->name.len - 1;
struct dentry *dentry = NULL;
int err;
- if (*s != '/')
+ if (d->name.name[0] != '/')
return ovl_lookup_single(base, d, d->name.name, d->name.len,
0, "", ret);
- while (*s++ == '/' && !IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
+ while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
+ const char *s = d->name.name + d->name.len - rem;
const char *next = strchrnul(s, '/');
- size_t slen = strlen(s);
+ size_t thislen = next - s;
+ bool end = !next[0];
- if (WARN_ON(slen > d->name.len) ||
- WARN_ON(strcmp(d->name.name + d->name.len - slen, s)))
+ /* Verify we did not go off the rails */
+ if (WARN_ON(s[-1] != '/'))
return -EIO;
- err = ovl_lookup_single(base, d, s, next - s,
- d->name.len - slen, next, &base);
+ err = ovl_lookup_single(base, d, s, thislen,
+ d->name.len - rem, next, &base);
dput(dentry);
if (err)
return err;
dentry = base;
- s = next;
+ if (end)
+ break;
+
+ rem -= thislen + 1;
+
+ if (WARN_ON(rem >= d->name.len))
+ return -EIO;
}
*ret = dentry;
return 0;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 8af450b0e57a..741dc0b6931f 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -127,6 +127,15 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
return err;
}
+static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
+{
+ struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
+ int err = IS_ERR(ret) ? PTR_ERR(ret) : 0;
+
+ pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
+ return ret;
+}
+
static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
{
unsigned long x = (unsigned long) READ_ONCE(inode->i_private);
@@ -169,6 +178,8 @@ void ovl_dentry_version_inc(struct dentry *dentry);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
+int ovl_copy_up_start(struct dentry *dentry);
+void ovl_copy_up_end(struct dentry *dentry);
/* namei.c */
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index d14bca1850d9..59614faa14c3 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -27,6 +27,8 @@ struct ovl_fs {
struct ovl_config config;
/* creds of process who forced instantiation of super block */
const struct cred *creator_cred;
+ bool tmpfile;
+ wait_queue_head_t copyup_wq;
};
/* private information held for every overlayfs dentry */
@@ -38,6 +40,7 @@ struct ovl_entry {
u64 version;
const char *redirect;
bool opaque;
+ bool copying;
};
struct rcu_head rcu;
};
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 20f48abbb82f..7b2188181367 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -160,6 +160,25 @@ static void ovl_put_super(struct super_block *sb)
kfree(ufs);
}
+static int ovl_sync_fs(struct super_block *sb, int wait)
+{
+ struct ovl_fs *ufs = sb->s_fs_info;
+ struct super_block *upper_sb;
+ int ret;
+
+ if (!ufs->upper_mnt)
+ return 0;
+ upper_sb = ufs->upper_mnt->mnt_sb;
+ if (!upper_sb->s_op->sync_fs)
+ return 0;
+
+ /* real inodes have already been synced by sync_filesystem(ovl_sb) */
+ down_read(&upper_sb->s_umount);
+ ret = upper_sb->s_op->sync_fs(upper_sb, wait);
+ up_read(&upper_sb->s_umount);
+ return ret;
+}
+
/**
* ovl_statfs
* @sb: The overlayfs super block
@@ -222,6 +241,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations ovl_super_operations = {
.put_super = ovl_put_super,
+ .sync_fs = ovl_sync_fs,
.statfs = ovl_statfs,
.show_options = ovl_show_options,
.remount_fs = ovl_remount,
@@ -701,6 +721,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
unsigned int stacklen = 0;
unsigned int i;
bool remote = false;
+ struct cred *cred;
int err;
err = -ENOMEM;
@@ -708,6 +729,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ufs)
goto out;
+ init_waitqueue_head(&ufs->copyup_wq);
ufs->config.redirect_dir = ovl_redirect_dir_def;
err = ovl_parse_opt((char *) data, &ufs->config);
if (err)
@@ -825,6 +847,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
* creation of workdir in previous step.
*/
if (ufs->workdir) {
+ struct dentry *temp;
+
err = ovl_check_d_type_supported(&workpath);
if (err < 0)
goto out_put_workdir;
@@ -836,6 +860,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
*/
if (!err)
pr_warn("overlayfs: upper fs needs to support d_type.\n");
+
+ /* Check if upper/work fs supports O_TMPFILE */
+ temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0);
+ ufs->tmpfile = !IS_ERR(temp);
+ if (ufs->tmpfile)
+ dput(temp);
+ else
+ pr_warn("overlayfs: upper fs does not support tmpfile.\n");
}
}
@@ -870,10 +902,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
else
sb->s_d_op = &ovl_dentry_operations;
- ufs->creator_cred = prepare_creds();
- if (!ufs->creator_cred)
+ ufs->creator_cred = cred = prepare_creds();
+ if (!cred)
goto out_put_lower_mnt;
+ /* Never override disk quota limits or use reserved space */
+ cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
+
err = -ENOMEM;
oe = ovl_alloc_entry(numlower);
if (!oe)
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 952286f4826c..01157d6e8cfe 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -263,3 +263,33 @@ struct file *ovl_path_open(struct path *path, int flags)
{
return dentry_open(path, flags | O_NOATIME, current_cred());
}
+
+int ovl_copy_up_start(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_entry *oe = dentry->d_fsdata;
+ int err;
+
+ spin_lock(&ofs->copyup_wq.lock);
+ err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
+ if (!err) {
+ if (oe->__upperdentry)
+ err = 1; /* Already copied up */
+ else
+ oe->copying = true;
+ }
+ spin_unlock(&ofs->copyup_wq.lock);
+
+ return err;
+}
+
+void ovl_copy_up_end(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ spin_lock(&ofs->copyup_wq.lock);
+ oe->copying = false;
+ wake_up_locked(&ofs->copyup_wq);
+ spin_unlock(&ofs->copyup_wq.lock);
+}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 595522022aca..c9d48dc78495 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -922,11 +922,10 @@ int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type)
int error;
if (type == ACL_TYPE_ACCESS) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
- return 0;
- if (error == 0)
- acl = NULL;
+ error = posix_acl_update_mode(inode,
+ &inode->i_mode, &acl);
+ if (error)
+ return error;
}
inode->i_ctime = current_time(inode);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8e7e61b28f31..87c9a9aacda3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3179,6 +3179,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
iter.tgid += 1, iter = next_tgid(ns, iter)) {
char name[PROC_NUMBUF];
int len;
+
+ cond_resched();
if (!has_pid_permissions(ns, iter.task, 2))
continue;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index a2066e6dee90..2726536489b1 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -173,7 +173,8 @@ u64 stable_page_flags(struct page *page)
u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active);
u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim);
- u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache);
+ if (PageSwapCache(page))
+ u |= 1 << KPF_SWAPCACHE;
u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked);
u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 55313d994895..d4e37acd4821 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -709,7 +709,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
ctl_dir = container_of(head, struct ctl_dir, header);
if (!dir_emit_dots(file, ctx))
- return 0;
+ goto out;
pos = 2;
@@ -719,6 +719,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
break;
}
}
+out:
sysctl_head_finish(head);
return 0;
}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 27c059e1760a..11f918d34b1e 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -133,7 +133,8 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max,
struct persistent_ram_zone *prz;
int i = (*c)++;
- if (i >= max)
+ /* Give up if we never existed or have hit the end. */
+ if (!przs || i >= max)
return NULL;
prz = przs[i];
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index a857338b7dab..bc927e30bdcc 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -467,8 +467,7 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
}
static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
- struct persistent_ram_ecc_info *ecc_info,
- unsigned long flags)
+ struct persistent_ram_ecc_info *ecc_info)
{
int ret;
@@ -494,10 +493,9 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
prz->buffer->sig);
}
+ /* Rewind missing or invalid memory area. */
prz->buffer->sig = sig;
persistent_ram_zap(prz);
- prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock);
- prz->flags = flags;
return 0;
}
@@ -533,11 +531,15 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
goto err;
}
+ /* Initialize general buffer state. */
+ prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock);
+ prz->flags = flags;
+
ret = persistent_ram_buffer_map(start, size, prz, memtype);
if (ret)
goto err;
- ret = persistent_ram_post_init(prz, sig, ecc_info, flags);
+ ret = persistent_ram_post_init(prz, sig, ecc_info);
if (ret)
goto err;
diff --git a/fs/read_write.c b/fs/read_write.c
index 5816d4c4cab0..820a3f06c46a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1518,6 +1518,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (flags != 0)
return -EINVAL;
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
ret = rw_verify_area(READ, file_in, &pos_in, len);
if (unlikely(ret))
return ret;
@@ -1538,7 +1543,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (len == 0)
return 0;
- sb_start_write(inode_out->i_sb);
+ file_start_write(file_out);
/*
* Try cloning first, this is supported by more file systems, and
@@ -1574,7 +1579,7 @@ done:
inc_syscr(current);
inc_syscw(current);
- sb_end_write(inode_out->i_sb);
+ file_end_write(file_out);
return ret;
}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index d0f8a38dfafa..0186fe6d39f3 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -74,6 +74,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
+#include <linux/major.h>
#include "internal.h"
static struct kmem_cache *romfs_inode_cachep;
@@ -416,7 +417,22 @@ static void romfs_destroy_inode(struct inode *inode)
static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
- u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+ u64 id = 0;
+
+ /* When calling huge_encode_dev(),
+ * use sb->s_bdev->bd_dev when,
+ * - CONFIG_ROMFS_ON_BLOCK defined
+ * use sb->s_dev when,
+ * - CONFIG_ROMFS_ON_BLOCK undefined and
+ * - CONFIG_ROMFS_ON_MTD defined
+ * leave id as 0 when,
+ * - CONFIG_ROMFS_ON_BLOCK undefined and
+ * - CONFIG_ROMFS_ON_MTD undefined
+ */
+ if (sb->s_bdev)
+ id = huge_encode_dev(sb->s_bdev->bd_dev);
+ else if (sb->s_dev)
+ id = huge_encode_dev(sb->s_dev);
buf->f_type = ROMFS_MAGIC;
buf->f_namelen = ROMFS_MAXFN;
@@ -489,6 +505,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= MS_RDONLY | MS_NOATIME;
sb->s_op = &romfs_super_ops;
+#ifdef CONFIG_ROMFS_ON_MTD
+ /* Use same dev ID from the underlying mtdblock device */
+ if (sb->s_mtd)
+ sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index);
+#endif
/* read the image superblock and check it */
rsb = kmalloc(512, GFP_KERNEL);
if (!rsb)
diff --git a/fs/splice.c b/fs/splice.c
index 873d83104e79..7d0c0f1a9657 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -204,6 +204,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
buf->len = spd->partial[page_nr].len;
buf->private = spd->partial[page_nr].private;
buf->ops = spd->ops;
+ buf->flags = 0;
pipe->nrbufs++;
page_nr++;
@@ -244,11 +245,6 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
}
EXPORT_SYMBOL(add_to_pipe);
-void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
-{
- put_page(spd->pages[i]);
-}
-
/*
* Check if we need to grow the arrays holding pages and partial page
* descriptions.
@@ -305,6 +301,8 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
idx = to.idx;
init_sync_kiocb(&kiocb, in);
+ if (flags & SPLICE_F_NONBLOCK)
+ kiocb.ki_flags |= IOCB_NDELAY;
kiocb.ki_pos = *ppos;
ret = in->f_op->read_iter(&kiocb, &to);
if (ret > 0) {
@@ -390,7 +388,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
struct iov_iter to;
struct page **pages;
unsigned int nr_pages;
- size_t offset, dummy, copied = 0;
+ size_t offset, base, copied = 0;
ssize_t res;
int i;
@@ -405,12 +403,11 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
- res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy);
+ res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
if (res <= 0)
return -ENOMEM;
- BUG_ON(dummy);
- nr_pages = DIV_ROUND_UP(res, PAGE_SIZE);
+ nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
vec = __vec;
if (nr_pages > PIPE_DEF_BUFFERS) {
@@ -1356,6 +1353,8 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
struct fd f;
long error;
+ if (unlikely(flags & ~SPLICE_F_ALL))
+ return -EINVAL;
if (unlikely(nr_segs > UIO_MAXIOV))
return -EINVAL;
else if (unlikely(!nr_segs))
@@ -1406,6 +1405,9 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
if (unlikely(!len))
return 0;
+ if (unlikely(flags & ~SPLICE_F_ALL))
+ return -EINVAL;
+
error = -EBADF;
in = fdget(fd_in);
if (in.file) {
@@ -1734,6 +1736,9 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
struct fd in;
int error;
+ if (unlikely(flags & ~SPLICE_F_ALL))
+ return -EINVAL;
+
if (unlikely(!len))
return 0;
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 0a908ae7af13..b0d0623c83ed 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -53,7 +53,7 @@ config UBIFS_ATIME_SUPPORT
config UBIFS_FS_ENCRYPTION
bool "UBIFS Encryption"
- depends on UBIFS_FS
+ depends on UBIFS_FS && BLOCK
select FS_ENCRYPTION
default n
help
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 3402720f2b28..6335abcf98df 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -26,15 +26,6 @@ static unsigned int ubifs_crypt_max_namelen(struct inode *inode)
return UBIFS_MAX_NLEN;
}
-static int ubifs_key_prefix(struct inode *inode, u8 **key)
-{
- static char prefix[] = "ubifs:";
-
- *key = prefix;
-
- return sizeof(prefix) - 1;
-}
-
int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
unsigned int in_len, unsigned int *out_len, int block)
{
@@ -88,10 +79,10 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn,
struct fscrypt_operations ubifs_crypt_operations = {
.flags = FS_CFLG_OWN_PAGES,
+ .key_prefix = "ubifs:",
.get_context = ubifs_crypt_get_context,
.set_context = ubifs_crypt_set_context,
.is_encrypted = __ubifs_crypt_is_encrypted,
.empty_dir = ubifs_crypt_empty_dir,
.max_namelen = ubifs_crypt_max_namelen,
- .key_prefix = ubifs_key_prefix,
};
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 1c5331ac9614..528369f3e472 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -390,16 +390,6 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry,
dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
dentry, mode, dir->i_ino);
- if (ubifs_crypt_is_encrypted(dir)) {
- err = fscrypt_get_encryption_info(dir);
- if (err)
- return err;
-
- if (!fscrypt_has_encryption_key(dir)) {
- return -EPERM;
- }
- }
-
err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
if (err)
return err;
@@ -741,17 +731,9 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
ubifs_assert(inode_is_locked(dir));
ubifs_assert(inode_is_locked(inode));
- if (ubifs_crypt_is_encrypted(dir)) {
- if (!fscrypt_has_permitted_context(dir, inode))
- return -EPERM;
-
- err = fscrypt_get_encryption_info(inode);
- if (err)
- return err;
-
- if (!fscrypt_has_encryption_key(inode))
- return -EPERM;
- }
+ if (ubifs_crypt_is_encrypted(dir) &&
+ !fscrypt_has_permitted_context(dir, inode))
+ return -EPERM;
err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
if (err)
@@ -1000,17 +982,6 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (err)
return err;
- if (ubifs_crypt_is_encrypted(dir)) {
- err = fscrypt_get_encryption_info(dir);
- if (err)
- goto out_budg;
-
- if (!fscrypt_has_encryption_key(dir)) {
- err = -EPERM;
- goto out_budg;
- }
- }
-
err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
if (err)
goto out_budg;
@@ -1096,17 +1067,6 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
return err;
}
- if (ubifs_crypt_is_encrypted(dir)) {
- err = fscrypt_get_encryption_info(dir);
- if (err)
- goto out_budg;
-
- if (!fscrypt_has_encryption_key(dir)) {
- err = -EPERM;
- goto out_budg;
- }
- }
-
err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
if (err)
goto out_budg;
@@ -1231,18 +1191,6 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
goto out_inode;
}
- err = fscrypt_get_encryption_info(inode);
- if (err) {
- kfree(sd);
- goto out_inode;
- }
-
- if (!fscrypt_has_encryption_key(inode)) {
- kfree(sd);
- err = -EPERM;
- goto out_inode;
- }
-
ostr.name = sd->encrypted_path;
ostr.len = disk_link.len;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 78d713644df3..da519ba205f6 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -217,6 +217,9 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case FS_IOC32_SETFLAGS:
cmd = FS_IOC_SETFLAGS;
break;
+ case FS_IOC_SET_ENCRYPTION_POLICY:
+ case FS_IOC_GET_ENCRYPTION_POLICY:
+ break;
default:
return -ENOIOCTLCMD;
}
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index a459211a1c21..294519b98874 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -744,6 +744,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
} else {
data->compr_size = 0;
+ out_len = compr_len;
}
dlen = UBIFS_DATA_NODE_SZ + out_len;
@@ -1319,6 +1320,7 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in
dn->compr_type = cpu_to_le16(compr_type);
dn->size = cpu_to_le32(*new_len);
*new_len = UBIFS_DATA_NODE_SZ + out_len;
+ err = 0;
out:
kfree(buf);
return err;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 74ae2de949df..709aa098dd46 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -34,6 +34,11 @@
#include <linux/slab.h>
#include "ubifs.h"
+static int try_read_node(const struct ubifs_info *c, void *buf, int type,
+ int len, int lnum, int offs);
+static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
+ struct ubifs_zbranch *zbr, void *node);
+
/*
* Returned codes of 'matches_name()' and 'fallible_matches_name()' functions.
* @NAME_LESS: name corresponding to the first argument is less than second
@@ -402,7 +407,19 @@ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
return 0;
}
- err = ubifs_tnc_read_node(c, zbr, node);
+ if (c->replaying) {
+ err = fallible_read_node(c, &zbr->key, zbr, node);
+ /*
+ * When the node was not found, return -ENOENT, 0 otherwise.
+ * Negative return codes stay as-is.
+ */
+ if (err == 0)
+ err = -ENOENT;
+ else if (err == 1)
+ err = 0;
+ } else {
+ err = ubifs_tnc_read_node(c, zbr, node);
+ }
if (err)
return err;
@@ -2857,7 +2874,11 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
if (fname_len(nm) > 0) {
if (err) {
/* Handle collisions */
- err = resolve_collision(c, key, &znode, &n, nm);
+ if (c->replaying)
+ err = fallible_resolve_collision(c, key, &znode, &n,
+ nm, 0);
+ else
+ err = resolve_collision(c, key, &znode, &n, nm);
dbg_tnc("rc returned %d, znode %p, n %d",
err, znode, n);
if (unlikely(err < 0))
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index ca72382ce6cc..d9df379bacc6 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -38,7 +38,11 @@
#include <linux/backing-dev.h>
#include <linux/security.h>
#include <linux/xattr.h>
-#include <linux/fscrypto.h>
+#ifdef CONFIG_UBIFS_FS_ENCRYPTION
+#include <linux/fscrypt_supp.h>
+#else
+#include <linux/fscrypt_notsupp.h>
+#endif
#include <linux/random.h>
#include "ubifs-media.h"
@@ -1797,28 +1801,6 @@ int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len,
#include "key.h"
#ifndef CONFIG_UBIFS_FS_ENCRYPTION
-#define fscrypt_set_d_op(i)
-#define fscrypt_get_ctx fscrypt_notsupp_get_ctx
-#define fscrypt_release_ctx fscrypt_notsupp_release_ctx
-#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page
-#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page
-#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages
-#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page
-#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page
-#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range
-#define fscrypt_ioctl_set_policy fscrypt_notsupp_ioctl_set_policy
-#define fscrypt_ioctl_get_policy fscrypt_notsupp_ioctl_get_policy
-#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context
-#define fscrypt_inherit_context fscrypt_notsupp_inherit_context
-#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info
-#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info
-#define fscrypt_setup_filename fscrypt_notsupp_setup_filename
-#define fscrypt_free_filename fscrypt_notsupp_free_filename
-#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size
-#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer
-#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer
-#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr
-#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk
static inline int ubifs_encrypt(const struct inode *inode,
struct ubifs_data_node *dn,
unsigned int in_len, unsigned int *out_len,
diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h
index 4792b771aa80..9f24bd1a9f44 100644
--- a/fs/udf/ecma_167.h
+++ b/fs/udf/ecma_167.h
@@ -41,7 +41,7 @@
struct charspec {
uint8_t charSetType;
uint8_t charSetInfo[63];
-} __attribute__ ((packed));
+} __packed;
/* Character Set Type (ECMA 167r3 1/7.2.1.1) */
#define CHARSPEC_TYPE_CS0 0x00 /* (1/7.2.2) */
@@ -68,7 +68,7 @@ struct timestamp {
uint8_t centiseconds;
uint8_t hundredsOfMicroseconds;
uint8_t microseconds;
-} __attribute__ ((packed));
+} __packed;
/* Type and Time Zone (ECMA 167r3 1/7.3.1) */
#define TIMESTAMP_TYPE_MASK 0xF000
@@ -82,7 +82,7 @@ struct regid {
uint8_t flags;
uint8_t ident[23];
uint8_t identSuffix[8];
-} __attribute__ ((packed));
+} __packed;
/* Flags (ECMA 167r3 1/7.4.1) */
#define ENTITYID_FLAGS_DIRTY 0x00
@@ -95,7 +95,7 @@ struct volStructDesc {
uint8_t stdIdent[VSD_STD_ID_LEN];
uint8_t structVersion;
uint8_t structData[2041];
-} __attribute__ ((packed));
+} __packed;
/* Standard Identifier (EMCA 167r2 2/9.1.2) */
#define VSD_STD_ID_NSR02 "NSR02" /* (3/9.1) */
@@ -114,7 +114,7 @@ struct beginningExtendedAreaDesc {
uint8_t stdIdent[VSD_STD_ID_LEN];
uint8_t structVersion;
uint8_t structData[2041];
-} __attribute__ ((packed));
+} __packed;
/* Terminating Extended Area Descriptor (ECMA 167r3 2/9.3) */
struct terminatingExtendedAreaDesc {
@@ -122,7 +122,7 @@ struct terminatingExtendedAreaDesc {
uint8_t stdIdent[VSD_STD_ID_LEN];
uint8_t structVersion;
uint8_t structData[2041];
-} __attribute__ ((packed));
+} __packed;
/* Boot Descriptor (ECMA 167r3 2/9.4) */
struct bootDesc {
@@ -140,7 +140,7 @@ struct bootDesc {
__le16 flags;
uint8_t reserved2[32];
uint8_t bootUse[1906];
-} __attribute__ ((packed));
+} __packed;
/* Flags (ECMA 167r3 2/9.4.12) */
#define BOOT_FLAGS_ERASE 0x01
@@ -149,7 +149,7 @@ struct bootDesc {
struct extent_ad {
__le32 extLength;
__le32 extLocation;
-} __attribute__ ((packed));
+} __packed;
struct kernel_extent_ad {
uint32_t extLength;
@@ -166,7 +166,7 @@ struct tag {
__le16 descCRC;
__le16 descCRCLength;
__le32 tagLocation;
-} __attribute__ ((packed));
+} __packed;
/* Tag Identifier (ECMA 167r3 3/7.2.1) */
#define TAG_IDENT_PVD 0x0001
@@ -186,7 +186,7 @@ struct NSRDesc {
uint8_t structVersion;
uint8_t reserved;
uint8_t structData[2040];
-} __attribute__ ((packed));
+} __packed;
/* Primary Volume Descriptor (ECMA 167r3 3/10.1) */
struct primaryVolDesc {
@@ -212,7 +212,7 @@ struct primaryVolDesc {
__le32 predecessorVolDescSeqLocation;
__le16 flags;
uint8_t reserved[22];
-} __attribute__ ((packed));
+} __packed;
/* Flags (ECMA 167r3 3/10.1.21) */
#define PVD_FLAGS_VSID_COMMON 0x0001
@@ -223,7 +223,7 @@ struct anchorVolDescPtr {
struct extent_ad mainVolDescSeqExt;
struct extent_ad reserveVolDescSeqExt;
uint8_t reserved[480];
-} __attribute__ ((packed));
+} __packed;
/* Volume Descriptor Pointer (ECMA 167r3 3/10.3) */
struct volDescPtr {
@@ -231,7 +231,7 @@ struct volDescPtr {
__le32 volDescSeqNum;
struct extent_ad nextVolDescSeqExt;
uint8_t reserved[484];
-} __attribute__ ((packed));
+} __packed;
/* Implementation Use Volume Descriptor (ECMA 167r3 3/10.4) */
struct impUseVolDesc {
@@ -239,7 +239,7 @@ struct impUseVolDesc {
__le32 volDescSeqNum;
struct regid impIdent;
uint8_t impUse[460];
-} __attribute__ ((packed));
+} __packed;
/* Partition Descriptor (ECMA 167r3 3/10.5) */
struct partitionDesc {
@@ -255,7 +255,7 @@ struct partitionDesc {
struct regid impIdent;
uint8_t impUse[128];
uint8_t reserved[156];
-} __attribute__ ((packed));
+} __packed;
/* Partition Flags (ECMA 167r3 3/10.5.3) */
#define PD_PARTITION_FLAGS_ALLOC 0x0001
@@ -291,14 +291,14 @@ struct logicalVolDesc {
uint8_t impUse[128];
struct extent_ad integritySeqExt;
uint8_t partitionMaps[0];
-} __attribute__ ((packed));
+} __packed;
/* Generic Partition Map (ECMA 167r3 3/10.7.1) */
struct genericPartitionMap {
uint8_t partitionMapType;
uint8_t partitionMapLength;
uint8_t partitionMapping[0];
-} __attribute__ ((packed));
+} __packed;
/* Partition Map Type (ECMA 167r3 3/10.7.1.1) */
#define GP_PARTITION_MAP_TYPE_UNDEF 0x00
@@ -311,14 +311,14 @@ struct genericPartitionMap1 {
uint8_t partitionMapLength;
__le16 volSeqNum;
__le16 partitionNum;
-} __attribute__ ((packed));
+} __packed;
/* Type 2 Partition Map (ECMA 167r3 3/10.7.3) */
struct genericPartitionMap2 {
uint8_t partitionMapType;
uint8_t partitionMapLength;
uint8_t partitionIdent[62];
-} __attribute__ ((packed));
+} __packed;
/* Unallocated Space Descriptor (ECMA 167r3 3/10.8) */
struct unallocSpaceDesc {
@@ -326,13 +326,13 @@ struct unallocSpaceDesc {
__le32 volDescSeqNum;
__le32 numAllocDescs;
struct extent_ad allocDescs[0];
-} __attribute__ ((packed));
+} __packed;
/* Terminating Descriptor (ECMA 167r3 3/10.9) */
struct terminatingDesc {
struct tag descTag;
uint8_t reserved[496];
-} __attribute__ ((packed));
+} __packed;
/* Logical Volume Integrity Descriptor (ECMA 167r3 3/10.10) */
struct logicalVolIntegrityDesc {
@@ -346,7 +346,7 @@ struct logicalVolIntegrityDesc {
__le32 freeSpaceTable[0];
__le32 sizeTable[0];
uint8_t impUse[0];
-} __attribute__ ((packed));
+} __packed;
/* Integrity Type (ECMA 167r3 3/10.10.3) */
#define LVID_INTEGRITY_TYPE_OPEN 0x00000000
@@ -356,7 +356,7 @@ struct logicalVolIntegrityDesc {
struct lb_addr {
__le32 logicalBlockNum;
__le16 partitionReferenceNum;
-} __attribute__ ((packed));
+} __packed;
/* ... and its in-core analog */
struct kernel_lb_addr {
@@ -368,14 +368,14 @@ struct kernel_lb_addr {
struct short_ad {
__le32 extLength;
__le32 extPosition;
-} __attribute__ ((packed));
+} __packed;
/* Long Allocation Descriptor (ECMA 167r3 4/14.14.2) */
struct long_ad {
__le32 extLength;
struct lb_addr extLocation;
uint8_t impUse[6];
-} __attribute__ ((packed));
+} __packed;
struct kernel_long_ad {
uint32_t extLength;
@@ -389,7 +389,7 @@ struct ext_ad {
__le32 recordedLength;
__le32 informationLength;
struct lb_addr extLocation;
-} __attribute__ ((packed));
+} __packed;
struct kernel_ext_ad {
uint32_t extLength;
@@ -434,7 +434,7 @@ struct fileSetDesc {
struct long_ad nextExt;
struct long_ad streamDirectoryICB;
uint8_t reserved[32];
-} __attribute__ ((packed));
+} __packed;
/* Partition Header Descriptor (ECMA 167r3 4/14.3) */
struct partitionHeaderDesc {
@@ -444,7 +444,7 @@ struct partitionHeaderDesc {
struct short_ad freedSpaceTable;
struct short_ad freedSpaceBitmap;
uint8_t reserved[88];
-} __attribute__ ((packed));
+} __packed;
/* File Identifier Descriptor (ECMA 167r3 4/14.4) */
struct fileIdentDesc {
@@ -457,7 +457,7 @@ struct fileIdentDesc {
uint8_t impUse[0];
uint8_t fileIdent[0];
uint8_t padding[0];
-} __attribute__ ((packed));
+} __packed;
/* File Characteristics (ECMA 167r3 4/14.4.3) */
#define FID_FILE_CHAR_HIDDEN 0x01
@@ -471,7 +471,7 @@ struct allocExtDesc {
struct tag descTag;
__le32 previousAllocExtLocation;
__le32 lengthAllocDescs;
-} __attribute__ ((packed));
+} __packed;
/* ICB Tag (ECMA 167r3 4/14.6) */
struct icbtag {
@@ -483,7 +483,7 @@ struct icbtag {
uint8_t fileType;
struct lb_addr parentICBLocation;
__le16 flags;
-} __attribute__ ((packed));
+} __packed;
/* Strategy Type (ECMA 167r3 4/14.6.2) */
#define ICBTAG_STRATEGY_TYPE_UNDEF 0x0000
@@ -531,13 +531,13 @@ struct indirectEntry {
struct tag descTag;
struct icbtag icbTag;
struct long_ad indirectICB;
-} __attribute__ ((packed));
+} __packed;
/* Terminal Entry (ECMA 167r3 4/14.8) */
struct terminalEntry {
struct tag descTag;
struct icbtag icbTag;
-} __attribute__ ((packed));
+} __packed;
/* File Entry (ECMA 167r3 4/14.9) */
struct fileEntry {
@@ -563,7 +563,7 @@ struct fileEntry {
__le32 lengthAllocDescs;
uint8_t extendedAttr[0];
uint8_t allocDescs[0];
-} __attribute__ ((packed));
+} __packed;
/* Permissions (ECMA 167r3 4/14.9.5) */
#define FE_PERM_O_EXEC 0x00000001U
@@ -607,7 +607,7 @@ struct extendedAttrHeaderDesc {
struct tag descTag;
__le32 impAttrLocation;
__le32 appAttrLocation;
-} __attribute__ ((packed));
+} __packed;
/* Generic Format (ECMA 167r3 4/14.10.2) */
struct genericFormat {
@@ -616,7 +616,7 @@ struct genericFormat {
uint8_t reserved[3];
__le32 attrLength;
uint8_t attrData[0];
-} __attribute__ ((packed));
+} __packed;
/* Character Set Information (ECMA 167r3 4/14.10.3) */
struct charSetInfo {
@@ -627,7 +627,7 @@ struct charSetInfo {
__le32 escapeSeqLength;
uint8_t charSetType;
uint8_t escapeSeq[0];
-} __attribute__ ((packed));
+} __packed;
/* Alternate Permissions (ECMA 167r3 4/14.10.4) */
struct altPerms {
@@ -638,7 +638,7 @@ struct altPerms {
__le16 ownerIdent;
__le16 groupIdent;
__le16 permission;
-} __attribute__ ((packed));
+} __packed;
/* File Times Extended Attribute (ECMA 167r3 4/14.10.5) */
struct fileTimesExtAttr {
@@ -649,7 +649,7 @@ struct fileTimesExtAttr {
__le32 dataLength;
__le32 fileTimeExistence;
uint8_t fileTimes;
-} __attribute__ ((packed));
+} __packed;
/* FileTimeExistence (ECMA 167r3 4/14.10.5.6) */
#define FTE_CREATION 0x00000001
@@ -666,7 +666,7 @@ struct infoTimesExtAttr {
__le32 dataLength;
__le32 infoTimeExistence;
uint8_t infoTimes[0];
-} __attribute__ ((packed));
+} __packed;
/* Device Specification (ECMA 167r3 4/14.10.7) */
struct deviceSpec {
@@ -678,7 +678,7 @@ struct deviceSpec {
__le32 majorDeviceIdent;
__le32 minorDeviceIdent;
uint8_t impUse[0];
-} __attribute__ ((packed));
+} __packed;
/* Implementation Use Extended Attr (ECMA 167r3 4/14.10.8) */
struct impUseExtAttr {
@@ -689,7 +689,7 @@ struct impUseExtAttr {
__le32 impUseLength;
struct regid impIdent;
uint8_t impUse[0];
-} __attribute__ ((packed));
+} __packed;
/* Application Use Extended Attribute (ECMA 167r3 4/14.10.9) */
struct appUseExtAttr {
@@ -700,7 +700,7 @@ struct appUseExtAttr {
__le32 appUseLength;
struct regid appIdent;
uint8_t appUse[0];
-} __attribute__ ((packed));
+} __packed;
#define EXTATTR_CHAR_SET 1
#define EXTATTR_ALT_PERMS 3
@@ -716,7 +716,7 @@ struct unallocSpaceEntry {
struct icbtag icbTag;
__le32 lengthAllocDescs;
uint8_t allocDescs[0];
-} __attribute__ ((packed));
+} __packed;
/* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */
struct spaceBitmapDesc {
@@ -724,7 +724,7 @@ struct spaceBitmapDesc {
__le32 numOfBits;
__le32 numOfBytes;
uint8_t bitmap[0];
-} __attribute__ ((packed));
+} __packed;
/* Partition Integrity Entry (ECMA 167r3 4/14.13) */
struct partitionIntegrityEntry {
@@ -735,7 +735,7 @@ struct partitionIntegrityEntry {
uint8_t reserved[175];
struct regid impIdent;
uint8_t impUse[256];
-} __attribute__ ((packed));
+} __packed;
/* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */
@@ -753,7 +753,7 @@ struct partitionIntegrityEntry {
struct logicalVolHeaderDesc {
__le64 uniqueID;
uint8_t reserved[24];
-} __attribute__ ((packed));
+} __packed;
/* Path Component (ECMA 167r3 4/14.16.1) */
struct pathComponent {
@@ -761,7 +761,7 @@ struct pathComponent {
uint8_t lengthComponentIdent;
__le16 componentFileVersionNum;
dstring componentIdent[0];
-} __attribute__ ((packed));
+} __packed;
/* File Entry (ECMA 167r3 4/14.17) */
struct extendedFileEntry {
@@ -791,6 +791,6 @@ struct extendedFileEntry {
__le32 lengthAllocDescs;
uint8_t extendedAttr[0];
uint8_t allocDescs[0];
-} __attribute__ ((packed));
+} __packed;
#endif /* _ECMA_167_H */
diff --git a/fs/udf/file.c b/fs/udf/file.c
index dbcb3a4a0cb9..e04cc0cdca9d 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -176,54 +176,46 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
long old_block, new_block;
- int result = -EINVAL;
+ int result;
if (inode_permission(inode, MAY_READ) != 0) {
udf_debug("no permission to access inode %lu\n", inode->i_ino);
- result = -EPERM;
- goto out;
+ return -EPERM;
}
- if (!arg) {
+ if (!arg && ((cmd == UDF_GETVOLIDENT) || (cmd == UDF_GETEASIZE) ||
+ (cmd == UDF_RELOCATE_BLOCKS) || (cmd == UDF_GETEABLOCK))) {
udf_debug("invalid argument to udf_ioctl\n");
- result = -EINVAL;
- goto out;
+ return -EINVAL;
}
switch (cmd) {
case UDF_GETVOLIDENT:
if (copy_to_user((char __user *)arg,
UDF_SB(inode->i_sb)->s_volume_ident, 32))
- result = -EFAULT;
- else
- result = 0;
- goto out;
+ return -EFAULT;
+ return 0;
case UDF_RELOCATE_BLOCKS:
- if (!capable(CAP_SYS_ADMIN)) {
- result = -EPERM;
- goto out;
- }
- if (get_user(old_block, (long __user *)arg)) {
- result = -EFAULT;
- goto out;
- }
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (get_user(old_block, (long __user *)arg))
+ return -EFAULT;
result = udf_relocate_blocks(inode->i_sb,
old_block, &new_block);
if (result == 0)
result = put_user(new_block, (long __user *)arg);
- goto out;
+ return result;
case UDF_GETEASIZE:
- result = put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg);
- goto out;
+ return put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg);
case UDF_GETEABLOCK:
- result = copy_to_user((char __user *)arg,
- UDF_I(inode)->i_ext.i_data,
- UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0;
- goto out;
+ return copy_to_user((char __user *)arg,
+ UDF_I(inode)->i_ext.i_data,
+ UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0;
+ default:
+ return -ENOIOCTLCMD;
}
-out:
- return result;
+ return 0;
}
static int udf_release_file(struct inode *inode, struct file *filp)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 0f3db71753aa..8ec6b3df0bc7 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -43,10 +43,6 @@
#include "udf_i.h"
#include "udf_sb.h"
-MODULE_AUTHOR("Ben Fennema");
-MODULE_DESCRIPTION("Universal Disk Format Filesystem");
-MODULE_LICENSE("GPL");
-
#define EXTENT_MERGE_SIZE 5
static umode_t udf_convert_permissions(struct fileEntry *);
@@ -57,14 +53,12 @@ static sector_t inode_getblk(struct inode *, sector_t, int *, int *);
static int8_t udf_insert_aext(struct inode *, struct extent_position,
struct kernel_lb_addr, uint32_t);
static void udf_split_extents(struct inode *, int *, int, int,
- struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
+ struct kernel_long_ad *, int *);
static void udf_prealloc_extents(struct inode *, int, int,
- struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
-static void udf_merge_extents(struct inode *,
- struct kernel_long_ad[EXTENT_MERGE_SIZE], int *);
-static void udf_update_extents(struct inode *,
- struct kernel_long_ad[EXTENT_MERGE_SIZE], int, int,
- struct extent_position *);
+ struct kernel_long_ad *, int *);
+static void udf_merge_extents(struct inode *, struct kernel_long_ad *, int *);
+static void udf_update_extents(struct inode *, struct kernel_long_ad *, int,
+ int, struct extent_position *);
static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
static void __udf_clear_extent_cache(struct inode *inode)
@@ -111,7 +105,7 @@ static int udf_read_extent_cache(struct inode *inode, loff_t bcount,
/* Add extent to extent cache */
static void udf_update_extent_cache(struct inode *inode, loff_t estart,
- struct extent_position *pos, int next_epos)
+ struct extent_position *pos)
{
struct udf_inode_info *iinfo = UDF_I(inode);
@@ -120,19 +114,16 @@ static void udf_update_extent_cache(struct inode *inode, loff_t estart,
__udf_clear_extent_cache(inode);
if (pos->bh)
get_bh(pos->bh);
- memcpy(&iinfo->cached_extent.epos, pos,
- sizeof(struct extent_position));
+ memcpy(&iinfo->cached_extent.epos, pos, sizeof(struct extent_position));
iinfo->cached_extent.lstart = estart;
- if (next_epos)
- switch (iinfo->i_alloc_type) {
- case ICBTAG_FLAG_AD_SHORT:
- iinfo->cached_extent.epos.offset -=
- sizeof(struct short_ad);
- break;
- case ICBTAG_FLAG_AD_LONG:
- iinfo->cached_extent.epos.offset -=
- sizeof(struct long_ad);
- }
+ switch (iinfo->i_alloc_type) {
+ case ICBTAG_FLAG_AD_SHORT:
+ iinfo->cached_extent.epos.offset -= sizeof(struct short_ad);
+ break;
+ case ICBTAG_FLAG_AD_LONG:
+ iinfo->cached_extent.epos.offset -= sizeof(struct long_ad);
+ break;
+ }
spin_unlock(&iinfo->i_extent_cache_lock);
}
@@ -747,11 +738,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
~(inode->i_sb->s_blocksize - 1));
udf_write_aext(inode, &cur_epos, &eloc, elen, 1);
}
- brelse(prev_epos.bh);
- brelse(cur_epos.bh);
- brelse(next_epos.bh);
newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
- return newblock;
+ goto out_free;
}
/* Are we beyond EOF? */
@@ -774,11 +762,9 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* Create extents for the hole between EOF and offset */
ret = udf_do_extend_file(inode, &prev_epos, laarr, offset);
if (ret < 0) {
- brelse(prev_epos.bh);
- brelse(cur_epos.bh);
- brelse(next_epos.bh);
*err = ret;
- return 0;
+ newblock = 0;
+ goto out_free;
}
c = 0;
offset = 0;
@@ -841,11 +827,9 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
iinfo->i_location.partitionReferenceNum,
goal, err);
if (!newblocknum) {
- brelse(prev_epos.bh);
- brelse(cur_epos.bh);
- brelse(next_epos.bh);
*err = -ENOSPC;
- return 0;
+ newblock = 0;
+ goto out_free;
}
if (isBeyondEOF)
iinfo->i_lenExtents += inode->i_sb->s_blocksize;
@@ -857,14 +841,12 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
* block */
udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum);
-#ifdef UDF_PREALLOCATE
/* We preallocate blocks only for regular files. It also makes sense
* for directories but there's a problem when to drop the
* preallocation. We might use some delayed work for that but I feel
* it's overengineering for a filesystem like UDF. */
if (S_ISREG(inode->i_mode))
udf_prealloc_extents(inode, c, lastblock, laarr, &endnum);
-#endif
/* merge any continuous blocks in laarr */
udf_merge_extents(inode, laarr, &endnum);
@@ -874,15 +856,11 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
* the new number of extents is less than the old number */
udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
- brelse(prev_epos.bh);
- brelse(cur_epos.bh);
- brelse(next_epos.bh);
-
newblock = udf_get_pblock(inode->i_sb, newblocknum,
iinfo->i_location.partitionReferenceNum, 0);
if (!newblock) {
*err = -EIO;
- return 0;
+ goto out_free;
}
*new = 1;
iinfo->i_next_alloc_block = block;
@@ -893,13 +871,15 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
udf_sync_inode(inode);
else
mark_inode_dirty(inode);
-
+out_free:
+ brelse(prev_epos.bh);
+ brelse(cur_epos.bh);
+ brelse(next_epos.bh);
return newblock;
}
static void udf_split_extents(struct inode *inode, int *c, int offset,
- int newblocknum,
- struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+ int newblocknum, struct kernel_long_ad *laarr,
int *endnum)
{
unsigned long blocksize = inode->i_sb->s_blocksize;
@@ -963,7 +943,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset,
}
static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
- struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+ struct kernel_long_ad *laarr,
int *endnum)
{
int start, length = 0, currlength = 0, i;
@@ -1058,8 +1038,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
}
}
-static void udf_merge_extents(struct inode *inode,
- struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+static void udf_merge_extents(struct inode *inode, struct kernel_long_ad *laarr,
int *endnum)
{
int i;
@@ -1158,8 +1137,7 @@ static void udf_merge_extents(struct inode *inode,
}
}
-static void udf_update_extents(struct inode *inode,
- struct kernel_long_ad laarr[EXTENT_MERGE_SIZE],
+static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
int startnum, int endnum,
struct extent_position *epos)
{
@@ -1299,6 +1277,12 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode)
int ret = -EIO;
reread:
+ if (iloc->partitionReferenceNum >= sbi->s_partitions) {
+ udf_debug("partition reference: %d > logical volume partitions: %d\n",
+ iloc->partitionReferenceNum, sbi->s_partitions);
+ return -EIO;
+ }
+
if (iloc->logicalBlockNum >=
sbi->s_partmaps[iloc->partitionReferenceNum].s_partition_len) {
udf_debug("block=%d, partition=%d out of range\n",
@@ -1549,7 +1533,7 @@ reread:
break;
case ICBTAG_FILE_TYPE_SYMLINK:
inode->i_data.a_ops = &udf_symlink_aops;
- inode->i_op = &page_symlink_inode_operations;
+ inode->i_op = &udf_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mode = S_IFLNK | S_IRWXUGO;
break;
@@ -1627,6 +1611,14 @@ static int udf_sync_inode(struct inode *inode)
return udf_update_inode(inode, 1);
}
+static void udf_adjust_time(struct udf_inode_info *iinfo, struct timespec time)
+{
+ if (iinfo->i_crtime.tv_sec > time.tv_sec ||
+ (iinfo->i_crtime.tv_sec == time.tv_sec &&
+ iinfo->i_crtime.tv_nsec > time.tv_nsec))
+ iinfo->i_crtime = time;
+}
+
static int udf_update_inode(struct inode *inode, int do_sync)
{
struct buffer_head *bh = NULL;
@@ -1753,20 +1745,9 @@ static int udf_update_inode(struct inode *inode, int do_sync)
efe->objectSize = cpu_to_le64(inode->i_size);
efe->logicalBlocksRecorded = cpu_to_le64(lb_recorded);
- if (iinfo->i_crtime.tv_sec > inode->i_atime.tv_sec ||
- (iinfo->i_crtime.tv_sec == inode->i_atime.tv_sec &&
- iinfo->i_crtime.tv_nsec > inode->i_atime.tv_nsec))
- iinfo->i_crtime = inode->i_atime;
-
- if (iinfo->i_crtime.tv_sec > inode->i_mtime.tv_sec ||
- (iinfo->i_crtime.tv_sec == inode->i_mtime.tv_sec &&
- iinfo->i_crtime.tv_nsec > inode->i_mtime.tv_nsec))
- iinfo->i_crtime = inode->i_mtime;
-
- if (iinfo->i_crtime.tv_sec > inode->i_ctime.tv_sec ||
- (iinfo->i_crtime.tv_sec == inode->i_ctime.tv_sec &&
- iinfo->i_crtime.tv_nsec > inode->i_ctime.tv_nsec))
- iinfo->i_crtime = inode->i_ctime;
+ udf_adjust_time(iinfo, inode->i_atime);
+ udf_adjust_time(iinfo, inode->i_mtime);
+ udf_adjust_time(iinfo, inode->i_ctime);
udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime);
udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime);
@@ -2286,8 +2267,7 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
uint32_t *elen, sector_t *offset)
{
unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
- loff_t lbcount = 0, bcount =
- (loff_t) block << blocksize_bits;
+ loff_t lbcount = 0, bcount = (loff_t) block << blocksize_bits;
int8_t etype;
struct udf_inode_info *iinfo;
@@ -2308,7 +2288,7 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
lbcount += *elen;
} while (lbcount <= bcount);
/* update extent cache */
- udf_update_extent_cache(inode, lbcount - *elen, pos, 1);
+ udf_update_extent_cache(inode, lbcount - *elen, pos);
*offset = (bcount + *elen - lbcount) >> blocksize_bits;
return etype;
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 6ad5a453af97..5c7ec121990d 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -58,7 +58,7 @@ unsigned long udf_get_last_block(struct super_block *sb)
*/
if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) ||
lblock == 0)
- lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
+ lblock = i_size_read(bdev->bd_inode) >> sb->s_blocksize_bits;
if (lblock)
return lblock - 1;
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 71d1c25f360d..3949c4bec3a3 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -141,8 +141,6 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
iinfo->i_lenEAttr += size;
return (struct genericFormat *)&ea[offset];
}
- if (loc & 0x02)
- ;
return NULL;
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2d65e280748b..babf48d0e553 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -931,7 +931,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
}
inode->i_data.a_ops = &udf_symlink_aops;
- inode->i_op = &page_symlink_inode_operations;
+ inode->i_op = &udf_symlink_inode_operations;
inode_nohighmem(inode);
if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h
index fbff74654df2..a4da59e38b7f 100644
--- a/fs/udf/osta_udf.h
+++ b/fs/udf/osta_udf.h
@@ -70,17 +70,17 @@ struct UDFIdentSuffix {
uint8_t OSClass;
uint8_t OSIdentifier;
uint8_t reserved[4];
-} __attribute__ ((packed));
+} __packed;
struct impIdentSuffix {
uint8_t OSClass;
uint8_t OSIdentifier;
uint8_t reserved[6];
-} __attribute__ ((packed));
+} __packed;
struct appIdentSuffix {
uint8_t impUse[8];
-} __attribute__ ((packed));
+} __packed;
/* Logical Volume Integrity Descriptor (UDF 2.50 2.2.6) */
/* Implementation Use (UDF 2.50 2.2.6.4) */
@@ -92,7 +92,7 @@ struct logicalVolIntegrityDescImpUse {
__le16 minUDFWriteRev;
__le16 maxUDFWriteRev;
uint8_t impUse[0];
-} __attribute__ ((packed));
+} __packed;
/* Implementation Use Volume Descriptor (UDF 2.50 2.2.7) */
/* Implementation Use (UDF 2.50 2.2.7.2) */
@@ -104,7 +104,7 @@ struct impUseVolDescImpUse {
dstring LVInfo3[36];
struct regid impIdent;
uint8_t impUse[128];
-} __attribute__ ((packed));
+} __packed;
struct udfPartitionMap2 {
uint8_t partitionMapType;
@@ -113,7 +113,7 @@ struct udfPartitionMap2 {
struct regid partIdent;
__le16 volSeqNum;
__le16 partitionNum;
-} __attribute__ ((packed));
+} __packed;
/* Virtual Partition Map (UDF 2.50 2.2.8) */
struct virtualPartitionMap {
@@ -124,7 +124,7 @@ struct virtualPartitionMap {
__le16 volSeqNum;
__le16 partitionNum;
uint8_t reserved2[24];
-} __attribute__ ((packed));
+} __packed;
/* Sparable Partition Map (UDF 2.50 2.2.9) */
struct sparablePartitionMap {
@@ -139,7 +139,7 @@ struct sparablePartitionMap {
uint8_t reserved2[1];
__le32 sizeSparingTable;
__le32 locSparingTable[4];
-} __attribute__ ((packed));
+} __packed;
/* Metadata Partition Map (UDF 2.4.0 2.2.10) */
struct metadataPartitionMap {
@@ -156,14 +156,14 @@ struct metadataPartitionMap {
__le16 alignUnitSize;
uint8_t flags;
uint8_t reserved2[5];
-} __attribute__ ((packed));
+} __packed;
/* Virtual Allocation Table (UDF 1.5 2.2.10) */
struct virtualAllocationTable15 {
__le32 VirtualSector[0];
struct regid vatIdent;
__le32 previousVATICBLoc;
-} __attribute__ ((packed));
+} __packed;
#define ICBTAG_FILE_TYPE_VAT15 0x00U
@@ -181,7 +181,7 @@ struct virtualAllocationTable20 {
__le16 reserved;
uint8_t impUse[0];
__le32 vatEntry[0];
-} __attribute__ ((packed));
+} __packed;
#define ICBTAG_FILE_TYPE_VAT20 0xF8U
@@ -189,7 +189,7 @@ struct virtualAllocationTable20 {
struct sparingEntry {
__le32 origLocation;
__le32 mappedLocation;
-} __attribute__ ((packed));
+} __packed;
struct sparingTable {
struct tag descTag;
@@ -199,7 +199,7 @@ struct sparingTable {
__le32 sequenceNum;
struct sparingEntry
mapEntry[0];
-} __attribute__ ((packed));
+} __packed;
/* Metadata File (and Metadata Mirror File) (UDF 2.50 2.2.13.1) */
#define ICBTAG_FILE_TYPE_MAIN 0xFA
@@ -210,7 +210,7 @@ struct sparingTable {
struct allocDescImpUse {
__le16 flags;
uint8_t impUse[4];
-} __attribute__ ((packed));
+} __packed;
#define AD_IU_EXT_ERASED 0x0001
@@ -222,7 +222,7 @@ struct allocDescImpUse {
struct freeEaSpace {
__le16 headerChecksum;
uint8_t freeEASpace[0];
-} __attribute__ ((packed));
+} __packed;
/* DVD Copyright Management Information (UDF 2.50 3.3.4.5.1.2) */
struct DVDCopyrightImpUse {
@@ -230,14 +230,14 @@ struct DVDCopyrightImpUse {
uint8_t CGMSInfo;
uint8_t dataType;
uint8_t protectionSystemInfo[4];
-} __attribute__ ((packed));
+} __packed;
/* Application Use Extended Attribute (UDF 2.50 3.3.4.6) */
/* FreeAppEASpace (UDF 2.50 3.3.4.6.1) */
struct freeAppEASpace {
__le16 headerChecksum;
uint8_t freeEASpace[0];
-} __attribute__ ((packed));
+} __packed;
/* UDF Defined System Stream (UDF 2.50 3.3.7) */
#define UDF_ID_UNIQUE_ID "*UDF Unique ID Mapping Data"
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4942549e7dc8..14b4bc1f6801 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -264,9 +264,6 @@ static void __exit exit_udf_fs(void)
destroy_inodecache();
}
-module_init(init_udf_fs)
-module_exit(exit_udf_fs)
-
static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
{
struct udf_sb_info *sbi = UDF_SB(sb);
@@ -1216,7 +1213,8 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
struct udf_inode_info *vati;
uint32_t pos;
struct virtualAllocationTable20 *vat20;
- sector_t blocks = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
+ sector_t blocks = i_size_read(sb->s_bdev->bd_inode) >>
+ sb->s_blocksize_bits;
udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block);
if (!sbi->s_vat_inode &&
@@ -1806,7 +1804,7 @@ static int udf_check_anchor_block(struct super_block *sb, sector_t block,
if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
udf_fixed_to_variable(block) >=
- sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
+ i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits)
return -EAGAIN;
bh = udf_read_tagged(sb, block, block, &ident);
@@ -1868,7 +1866,7 @@ static int udf_scan_anchors(struct super_block *sb, sector_t *lastblock,
last[last_count++] = *lastblock - 152;
for (i = 0; i < last_count; i++) {
- if (last[i] >= sb->s_bdev->bd_inode->i_size >>
+ if (last[i] >= i_size_read(sb->s_bdev->bd_inode) >>
sb->s_blocksize_bits)
continue;
ret = udf_check_anchor_block(sb, last[i], fileset);
@@ -1957,7 +1955,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
if (!nsr_off) {
if (!silent)
udf_warn(sb, "No VRS found\n");
- return 0;
+ return -EINVAL;
}
if (nsr_off == -1)
udf_debug("Failed to read sector at offset %d. "
@@ -1986,6 +1984,7 @@ static void udf_open_lvid(struct super_block *sb)
struct buffer_head *bh = sbi->s_lvid_bh;
struct logicalVolIntegrityDesc *lvid;
struct logicalVolIntegrityDescImpUse *lvidiu;
+ struct timespec ts;
if (!bh)
return;
@@ -1997,8 +1996,8 @@ static void udf_open_lvid(struct super_block *sb)
mutex_lock(&sbi->s_alloc_mutex);
lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
- udf_time_to_disk_stamp(&lvid->recordingDateAndTime,
- CURRENT_TIME);
+ ktime_get_real_ts(&ts);
+ udf_time_to_disk_stamp(&lvid->recordingDateAndTime, ts);
lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN);
lvid->descTag.descCRC = cpu_to_le16(
@@ -2019,6 +2018,7 @@ static void udf_close_lvid(struct super_block *sb)
struct buffer_head *bh = sbi->s_lvid_bh;
struct logicalVolIntegrityDesc *lvid;
struct logicalVolIntegrityDescImpUse *lvidiu;
+ struct timespec ts;
if (!bh)
return;
@@ -2030,7 +2030,8 @@ static void udf_close_lvid(struct super_block *sb)
mutex_lock(&sbi->s_alloc_mutex);
lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
- udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME);
+ ktime_get_real_ts(&ts);
+ udf_time_to_disk_stamp(&lvid->recordingDateAndTime, ts);
if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev))
lvidiu->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION);
if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFReadRev))
@@ -2158,15 +2159,25 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
} else {
uopt.blocksize = bdev_logical_block_size(sb->s_bdev);
- ret = udf_load_vrs(sb, &uopt, silent, &fileset);
- if (ret == -EAGAIN && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
- if (!silent)
- pr_notice("Rescanning with blocksize %d\n",
- UDF_DEFAULT_BLOCKSIZE);
- brelse(sbi->s_lvid_bh);
- sbi->s_lvid_bh = NULL;
- uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
+ while (uopt.blocksize <= 4096) {
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
+ if (ret < 0) {
+ if (!silent && ret != -EACCES) {
+ pr_notice("Scanning with blocksize %d failed\n",
+ uopt.blocksize);
+ }
+ brelse(sbi->s_lvid_bh);
+ sbi->s_lvid_bh = NULL;
+ /*
+ * EACCES is special - we want to propagate to
+ * upper layers that we cannot handle RW mount.
+ */
+ if (ret == -EACCES)
+ break;
+ } else
+ break;
+
+ uopt.blocksize <<= 1;
}
}
if (ret < 0) {
@@ -2497,3 +2508,9 @@ static unsigned int udf_count_free(struct super_block *sb)
return accum;
}
+
+MODULE_AUTHOR("Ben Fennema");
+MODULE_DESCRIPTION("Universal Disk Format Filesystem");
+MODULE_LICENSE("GPL");
+module_init(init_udf_fs)
+module_exit(exit_udf_fs)
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 8d619773056b..f7dfef53f739 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -152,9 +152,39 @@ out_unmap:
return err;
}
+static int udf_symlink_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct inode *inode = d_backing_inode(dentry);
+ struct page *page;
+
+ generic_fillattr(inode, stat);
+ page = read_mapping_page(inode->i_mapping, 0, NULL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+ /*
+ * UDF uses non-trivial encoding of symlinks so i_size does not match
+ * number of characters reported by readlink(2) which apparently some
+ * applications expect. Also POSIX says that "The value returned in the
+ * st_size field shall be the length of the contents of the symbolic
+ * link, and shall not count a trailing null if one is present." So
+ * let's report the length of string returned by readlink(2) for
+ * st_size.
+ */
+ stat->size = strlen(page_address(page));
+ put_page(page);
+
+ return 0;
+}
+
/*
* symlinks can't do much...
*/
const struct address_space_operations udf_symlink_aops = {
.readpage = udf_symlink_filler,
};
+
+const struct inode_operations udf_symlink_inode_operations = {
+ .get_link = page_get_link,
+ .getattr = udf_symlink_getattr,
+};
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 263829ef1873..63b034984378 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -15,7 +15,6 @@
#include "udfend.h"
#include "udf_i.h"
-#define UDF_PREALLOCATE
#define UDF_DEFAULT_PREALLOC_BLOCKS 8
extern __printf(3, 4) void _udf_err(struct super_block *sb,
@@ -85,6 +84,7 @@ extern const struct inode_operations udf_dir_inode_operations;
extern const struct file_operations udf_dir_operations;
extern const struct inode_operations udf_file_inode_operations;
extern const struct file_operations udf_file_operations;
+extern const struct inode_operations udf_symlink_inode_operations;
extern const struct address_space_operations udf_aops;
extern const struct address_space_operations udf_adinicb_aops;
extern const struct address_space_operations udf_symlink_aops;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index d96e2f30084b..43953e03c356 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -63,6 +63,7 @@ struct userfaultfd_wait_queue {
struct uffd_msg msg;
wait_queue_t wq;
struct userfaultfd_ctx *ctx;
+ bool waken;
};
struct userfaultfd_wake_range {
@@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
if (len && (start > uwq->msg.arg.pagefault.address ||
start + len <= uwq->msg.arg.pagefault.address))
goto out;
+ WRITE_ONCE(uwq->waken, true);
+ /*
+ * The implicit smp_mb__before_spinlock in try_to_wake_up()
+ * renders uwq->waken visible to other CPUs before the task is
+ * waken.
+ */
ret = wake_up_state(wq->private, mode);
if (ret)
/*
@@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
struct userfaultfd_wait_queue uwq;
int ret;
bool must_wait, return_to_userland;
+ long blocking_state;
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
@@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
uwq.wq.private = current;
uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
uwq.ctx = ctx;
+ uwq.waken = false;
return_to_userland =
(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
+ blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
+ TASK_KILLABLE;
spin_lock(&ctx->fault_pending_wqh.lock);
/*
@@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
* following the spin_unlock to happen before the list_add in
* __add_wait_queue.
*/
- set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
- TASK_KILLABLE);
+ set_current_state(blocking_state);
spin_unlock(&ctx->fault_pending_wqh.lock);
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
wake_up_poll(&ctx->fd_wqh, POLLIN);
schedule();
ret |= VM_FAULT_MAJOR;
+
+ /*
+ * False wakeups can orginate even from rwsem before
+ * up_read() however userfaults will wait either for a
+ * targeted wakeup on the specific uwq waitqueue from
+ * wake_userfault() or for signals or for uffd
+ * release.
+ */
+ while (!READ_ONCE(uwq.waken)) {
+ /*
+ * This needs the full smp_store_mb()
+ * guarantee as the state write must be
+ * visible to other CPUs before reading
+ * uwq.waken from other CPUs.
+ */
+ set_current_state(blocking_state);
+ if (READ_ONCE(uwq.waken) ||
+ READ_ONCE(ctx->released) ||
+ (return_to_userland ? signal_pending(current) :
+ fatal_signal_pending(current)))
+ break;
+ schedule();
+ }
}
__set_current_state(TASK_RUNNING);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index e5ebc3770460..33db69be4832 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -39,6 +39,7 @@
#include "xfs_rmap_btree.h"
#include "xfs_btree.h"
#include "xfs_refcount_btree.h"
+#include "xfs_ialloc_btree.h"
/*
* Per-AG Block Reservations
@@ -200,22 +201,30 @@ __xfs_ag_resv_init(
struct xfs_mount *mp = pag->pag_mount;
struct xfs_ag_resv *resv;
int error;
+ xfs_extlen_t reserved;
- resv = xfs_perag_resv(pag, type);
if (used > ask)
ask = used;
- resv->ar_asked = ask;
- resv->ar_reserved = resv->ar_orig_reserved = ask - used;
- mp->m_ag_max_usable -= ask;
+ reserved = ask - used;
- trace_xfs_ag_resv_init(pag, type, ask);
-
- error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true);
- if (error)
+ error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true);
+ if (error) {
trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
error, _RET_IP_);
+ xfs_warn(mp,
+"Per-AG reservation for AG %u failed. Filesystem may run out of space.",
+ pag->pag_agno);
+ return error;
+ }
- return error;
+ mp->m_ag_max_usable -= ask;
+
+ resv = xfs_perag_resv(pag, type);
+ resv->ar_asked = ask;
+ resv->ar_reserved = resv->ar_orig_reserved = reserved;
+
+ trace_xfs_ag_resv_init(pag, type, ask);
+ return 0;
}
/* Create a per-AG block reservation. */
@@ -223,6 +232,8 @@ int
xfs_ag_resv_init(
struct xfs_perag *pag)
{
+ struct xfs_mount *mp = pag->pag_mount;
+ xfs_agnumber_t agno = pag->pag_agno;
xfs_extlen_t ask;
xfs_extlen_t used;
int error = 0;
@@ -231,23 +242,45 @@ xfs_ag_resv_init(
if (pag->pag_meta_resv.ar_asked == 0) {
ask = used = 0;
- error = xfs_refcountbt_calc_reserves(pag->pag_mount,
- pag->pag_agno, &ask, &used);
+ error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used);
if (error)
goto out;
- error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
- ask, used);
+ error = xfs_finobt_calc_reserves(mp, agno, &ask, &used);
if (error)
goto out;
+
+ error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
+ ask, used);
+ if (error) {
+ /*
+ * Because we didn't have per-AG reservations when the
+ * finobt feature was added we might not be able to
+ * reserve all needed blocks. Warn and fall back to the
+ * old and potentially buggy code in that case, but
+ * ensure we do have the reservation for the refcountbt.
+ */
+ ask = used = 0;
+
+ mp->m_inotbt_nores = true;
+
+ error = xfs_refcountbt_calc_reserves(mp, agno, &ask,
+ &used);
+ if (error)
+ goto out;
+
+ error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
+ ask, used);
+ if (error)
+ goto out;
+ }
}
/* Create the AGFL metadata reservation */
if (pag->pag_agfl_resv.ar_asked == 0) {
ask = used = 0;
- error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno,
- &ask, &used);
+ error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
if (error)
goto out;
@@ -256,6 +289,16 @@ xfs_ag_resv_init(
goto out;
}
+#ifdef DEBUG
+ /* need to read in the AGF for the ASSERT below to work */
+ error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0);
+ if (error)
+ return error;
+
+ ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
+ xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
+ pag->pagf_freeblks + pag->pagf_flcount);
+#endif
out:
return error;
}
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 5050056a0b06..fe98fbc4adf1 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -95,10 +95,7 @@ unsigned int
xfs_alloc_set_aside(
struct xfs_mount *mp)
{
- unsigned int blocks;
-
- blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
- return blocks;
+ return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
}
/*
@@ -224,20 +221,22 @@ xfs_alloc_get_rec(
* Compute aligned version of the found extent.
* Takes alignment and min length into account.
*/
-STATIC void
+STATIC bool
xfs_alloc_compute_aligned(
xfs_alloc_arg_t *args, /* allocation argument structure */
xfs_agblock_t foundbno, /* starting block in found extent */
xfs_extlen_t foundlen, /* length in found extent */
xfs_agblock_t *resbno, /* result block number */
- xfs_extlen_t *reslen) /* result length */
+ xfs_extlen_t *reslen, /* result length */
+ unsigned *busy_gen)
{
- xfs_agblock_t bno;
- xfs_extlen_t len;
+ xfs_agblock_t bno = foundbno;
+ xfs_extlen_t len = foundlen;
xfs_extlen_t diff;
+ bool busy;
/* Trim busy sections out of found extent */
- xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
+ busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
/*
* If we have a largish extent that happens to start before min_agbno,
@@ -262,6 +261,8 @@ xfs_alloc_compute_aligned(
*resbno = bno;
*reslen = len;
}
+
+ return busy;
}
/*
@@ -365,36 +366,12 @@ xfs_alloc_fix_len(
return;
ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
ASSERT(rlen % args->prod == args->mod);
+ ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
+ rlen + args->minleft);
args->len = rlen;
}
/*
- * Fix up length if there is too little space left in the a.g.
- * Return 1 if ok, 0 if too little, should give up.
- */
-STATIC int
-xfs_alloc_fix_minleft(
- xfs_alloc_arg_t *args) /* allocation argument structure */
-{
- xfs_agf_t *agf; /* a.g. freelist header */
- int diff; /* free space difference */
-
- if (args->minleft == 0)
- return 1;
- agf = XFS_BUF_TO_AGF(args->agbp);
- diff = be32_to_cpu(agf->agf_freeblks)
- - args->len - args->minleft;
- if (diff >= 0)
- return 1;
- args->len += diff; /* shrink the allocated space */
- /* casts to (int) catch length underflows */
- if ((int)args->len >= (int)args->minlen)
- return 1;
- args->agbno = NULLAGBLOCK;
- return 0;
-}
-
-/*
* Update the two btrees, logically removing from freespace the extent
* starting at rbno, rlen blocks. The extent is contained within the
* actual (current) free extent fbno for flen blocks.
@@ -689,8 +666,6 @@ xfs_alloc_ag_vextent(
xfs_alloc_arg_t *args) /* argument structure for allocation */
{
int error=0;
- xfs_extlen_t reservation;
- xfs_extlen_t oldmax;
ASSERT(args->minlen > 0);
ASSERT(args->maxlen > 0);
@@ -699,20 +674,6 @@ xfs_alloc_ag_vextent(
ASSERT(args->alignment > 0);
/*
- * Clamp maxlen to the amount of free space minus any reservations
- * that have been made.
- */
- oldmax = args->maxlen;
- reservation = xfs_ag_resv_needed(args->pag, args->resv);
- if (args->maxlen > args->pag->pagf_freeblks - reservation)
- args->maxlen = args->pag->pagf_freeblks - reservation;
- if (args->maxlen == 0) {
- args->agbno = NULLAGBLOCK;
- args->maxlen = oldmax;
- return 0;
- }
-
- /*
* Branch to correct routine based on the type.
*/
args->wasfromfl = 0;
@@ -731,8 +692,6 @@ xfs_alloc_ag_vextent(
/* NOTREACHED */
}
- args->maxlen = oldmax;
-
if (error || args->agbno == NULLAGBLOCK)
return error;
@@ -782,10 +741,11 @@ xfs_alloc_ag_vextent_exact(
int error;
xfs_agblock_t fbno; /* start block of found extent */
xfs_extlen_t flen; /* length of found extent */
- xfs_agblock_t tbno; /* start block of trimmed extent */
- xfs_extlen_t tlen; /* length of trimmed extent */
- xfs_agblock_t tend; /* end block of trimmed extent */
+ xfs_agblock_t tbno; /* start block of busy extent */
+ xfs_extlen_t tlen; /* length of busy extent */
+ xfs_agblock_t tend; /* end block of busy extent */
int i; /* success/failure of operation */
+ unsigned busy_gen;
ASSERT(args->alignment == 1);
@@ -818,7 +778,9 @@ xfs_alloc_ag_vextent_exact(
/*
* Check for overlapping busy extents.
*/
- xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
+ tbno = fbno;
+ tlen = flen;
+ xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
/*
* Give up if the start of the extent is busy, or the freespace isn't
@@ -841,9 +803,6 @@ xfs_alloc_ag_vextent_exact(
args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
- args->agbno;
xfs_alloc_fix_len(args);
- if (!xfs_alloc_fix_minleft(args))
- goto not_found;
-
ASSERT(args->agbno + args->len <= tend);
/*
@@ -901,6 +860,7 @@ xfs_alloc_find_best_extent(
xfs_agblock_t sdiff;
int error;
int i;
+ unsigned busy_gen;
/* The good extent is perfect, no need to search. */
if (!gdiff)
@@ -914,7 +874,8 @@ xfs_alloc_find_best_extent(
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
+ xfs_alloc_compute_aligned(args, *sbno, *slen,
+ sbnoa, slena, &busy_gen);
/*
* The good extent is closer than this one.
@@ -1003,7 +964,8 @@ xfs_alloc_ag_vextent_near(
xfs_extlen_t ltlena; /* aligned ... */
xfs_agblock_t ltnew; /* useful start bno of left side */
xfs_extlen_t rlen; /* length of returned extent */
- int forced = 0;
+ bool busy;
+ unsigned busy_gen;
#ifdef DEBUG
/*
* Randomly don't execute the first algorithm.
@@ -1030,6 +992,7 @@ restart:
ltlen = 0;
gtlena = 0;
ltlena = 0;
+ busy = false;
/*
* Get a cursor for the by-size btree.
@@ -1112,8 +1075,8 @@ restart:
if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, ltbno, ltlen,
- &ltbnoa, &ltlena);
+ busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
+ &ltbnoa, &ltlena, &busy_gen);
if (ltlena < args->minlen)
continue;
if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
@@ -1149,12 +1112,7 @@ restart:
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
args->len = blen;
- if (!xfs_alloc_fix_minleft(args)) {
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- trace_xfs_alloc_near_nominleft(args);
- return 0;
- }
- blen = args->len;
+
/*
* We are allocating starting at bnew for blen blocks.
*/
@@ -1236,8 +1194,8 @@ restart:
if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, ltbno, ltlen,
- &ltbnoa, &ltlena);
+ busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
+ &ltbnoa, &ltlena, &busy_gen);
if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
break;
if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
@@ -1252,8 +1210,8 @@ restart:
if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, gtbno, gtlen,
- &gtbnoa, &gtlena);
+ busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
+ &gtbnoa, &gtlena, &busy_gen);
if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
break;
if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
@@ -1314,9 +1272,9 @@ restart:
if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- if (!forced++) {
+ if (busy) {
trace_xfs_alloc_near_busy(args);
- xfs_log_force(args->mp, XFS_LOG_SYNC);
+ xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
goto restart;
}
trace_xfs_alloc_size_neither(args);
@@ -1346,12 +1304,6 @@ restart:
*/
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
- if (!xfs_alloc_fix_minleft(args)) {
- trace_xfs_alloc_near_nominleft(args);
- xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- return 0;
- }
rlen = args->len;
(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
args->datatype, ltbnoa, ltlena, &ltnew);
@@ -1403,7 +1355,8 @@ xfs_alloc_ag_vextent_size(
int i; /* temp status variable */
xfs_agblock_t rbno; /* returned block number */
xfs_extlen_t rlen; /* length of returned extent */
- int forced = 0;
+ bool busy;
+ unsigned busy_gen;
restart:
/*
@@ -1412,6 +1365,7 @@ restart:
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
args->agno, XFS_BTNUM_CNT);
bno_cur = NULL;
+ busy = false;
/*
* Look for an entry >= maxlen+alignment-1 blocks.
@@ -1421,14 +1375,13 @@ restart:
goto error0;
/*
- * If none or we have busy extents that we cannot allocate from, then
- * we have to settle for a smaller extent. In the case that there are
- * no large extents, this will return the last entry in the tree unless
- * the tree is empty. In the case that there are only busy large
- * extents, this will return the largest small extent unless there
+ * If none then we have to settle for a smaller extent. In the case that
+ * there are no large extents, this will return the last entry in the
+ * tree unless the tree is empty. In the case that there are only busy
+ * large extents, this will return the largest small extent unless there
* are no smaller extents available.
*/
- if (!i || forced > 1) {
+ if (!i) {
error = xfs_alloc_ag_vextent_small(args, cnt_cur,
&fbno, &flen, &i);
if (error)
@@ -1439,13 +1392,11 @@ restart:
return 0;
}
ASSERT(i == 1);
- xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
+ busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
+ &rlen, &busy_gen);
} else {
/*
* Search for a non-busy extent that is large enough.
- * If we are at low space, don't check, or if we fall of
- * the end of the btree, turn off the busy check and
- * restart.
*/
for (;;) {
error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
@@ -1453,8 +1404,8 @@ restart:
goto error0;
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, fbno, flen,
- &rbno, &rlen);
+ busy = xfs_alloc_compute_aligned(args, fbno, flen,
+ &rbno, &rlen, &busy_gen);
if (rlen >= args->maxlen)
break;
@@ -1466,18 +1417,13 @@ restart:
/*
* Our only valid extents must have been busy.
* Make it unbusy by forcing the log out and
- * retrying. If we've been here before, forcing
- * the log isn't making the extents available,
- * which means they have probably been freed in
- * this transaction. In that case, we have to
- * give up on them and we'll attempt a minlen
- * allocation the next time around.
+ * retrying.
*/
xfs_btree_del_cursor(cnt_cur,
XFS_BTREE_NOERROR);
trace_xfs_alloc_size_busy(args);
- if (!forced++)
- xfs_log_force(args->mp, XFS_LOG_SYNC);
+ xfs_extent_busy_flush(args->mp,
+ args->pag, busy_gen);
goto restart;
}
}
@@ -1513,8 +1459,8 @@ restart:
XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
if (flen < bestrlen)
break;
- xfs_alloc_compute_aligned(args, fbno, flen,
- &rbno, &rlen);
+ busy = xfs_alloc_compute_aligned(args, fbno, flen,
+ &rbno, &rlen, &busy_gen);
rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
(rlen <= flen && rbno + rlen <= fbno + flen),
@@ -1543,18 +1489,16 @@ restart:
*/
args->len = rlen;
if (rlen < args->minlen) {
- if (!forced++) {
+ if (busy) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
trace_xfs_alloc_size_busy(args);
- xfs_log_force(args->mp, XFS_LOG_SYNC);
+ xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
goto restart;
}
goto out_nominleft;
}
xfs_alloc_fix_len(args);
- if (!xfs_alloc_fix_minleft(args))
- goto out_nominleft;
rlen = args->len;
XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
/*
@@ -2056,7 +2000,7 @@ xfs_alloc_space_available(
int flags)
{
struct xfs_perag *pag = args->pag;
- xfs_extlen_t longest;
+ xfs_extlen_t alloc_len, longest;
xfs_extlen_t reservation; /* blocks that are still reserved */
int available;
@@ -2066,17 +2010,28 @@ xfs_alloc_space_available(
reservation = xfs_ag_resv_needed(pag, args->resv);
/* do we have enough contiguous free space for the allocation? */
+ alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
reservation);
- if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
+ if (longest < alloc_len)
return false;
/* do we have enough free space remaining for the allocation? */
available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
- reservation - min_free - args->total);
- if (available < (int)args->minleft || available <= 0)
+ reservation - min_free - args->minleft);
+ if (available < (int)max(args->total, alloc_len))
return false;
+ /*
+ * Clamp maxlen to the amount of free space available for the actual
+ * extent allocation.
+ */
+ if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
+ args->maxlen = available;
+ ASSERT(args->maxlen > 0);
+ ASSERT(args->maxlen >= args->minlen);
+ }
+
return true;
}
@@ -2122,7 +2077,8 @@ xfs_alloc_fix_freelist(
}
need = xfs_alloc_min_freelist(mp, pag);
- if (!xfs_alloc_space_available(args, need, flags))
+ if (!xfs_alloc_space_available(args, need, flags |
+ XFS_ALLOC_FLAG_CHECK))
goto out_agbp_relse;
/*
@@ -2638,12 +2594,10 @@ xfs_alloc_vextent(
xfs_agblock_t agsize; /* allocation group size */
int error;
int flags; /* XFS_ALLOC_FLAG_... locking flags */
- xfs_extlen_t minleft;/* minimum left value, temp copy */
xfs_mount_t *mp; /* mount structure pointer */
xfs_agnumber_t sagno; /* starting allocation group number */
xfs_alloctype_t type; /* input allocation type */
int bump_rotor = 0;
- int no_min = 0;
xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
mp = args->mp;
@@ -2672,7 +2626,6 @@ xfs_alloc_vextent(
trace_xfs_alloc_vextent_badargs(args);
return 0;
}
- minleft = args->minleft;
switch (type) {
case XFS_ALLOCTYPE_THIS_AG:
@@ -2683,9 +2636,7 @@ xfs_alloc_vextent(
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
args->pag = xfs_perag_get(mp, args->agno);
- args->minleft = 0;
error = xfs_alloc_fix_freelist(args, 0);
- args->minleft = minleft;
if (error) {
trace_xfs_alloc_vextent_nofix(args);
goto error0;
@@ -2750,9 +2701,7 @@ xfs_alloc_vextent(
*/
for (;;) {
args->pag = xfs_perag_get(mp, args->agno);
- if (no_min) args->minleft = 0;
error = xfs_alloc_fix_freelist(args, flags);
- args->minleft = minleft;
if (error) {
trace_xfs_alloc_vextent_nofix(args);
goto error0;
@@ -2792,20 +2741,17 @@ xfs_alloc_vextent(
* or switch to non-trylock mode.
*/
if (args->agno == sagno) {
- if (no_min == 1) {
+ if (flags == 0) {
args->agbno = NULLAGBLOCK;
trace_xfs_alloc_vextent_allfailed(args);
break;
}
- if (flags == 0) {
- no_min = 1;
- } else {
- flags = 0;
- if (type == XFS_ALLOCTYPE_START_BNO) {
- args->agbno = XFS_FSB_TO_AGBNO(mp,
- args->fsbno);
- args->type = XFS_ALLOCTYPE_NEAR_BNO;
- }
+
+ flags = 0;
+ if (type == XFS_ALLOCTYPE_START_BNO) {
+ args->agbno = XFS_FSB_TO_AGBNO(mp,
+ args->fsbno);
+ args->type = XFS_ALLOCTYPE_NEAR_BNO;
}
}
xfs_perag_put(args->pag);
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 7c404a6b0ae3..1d0f48a501a3 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -56,7 +56,7 @@ typedef unsigned int xfs_alloctype_t;
#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */
#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */
-
+#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */
/*
* Argument structure for xfs_alloc routines.
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index af1ecb19121e..6622d46ddec3 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -131,9 +131,6 @@ xfs_attr_get(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- if (!xfs_inode_hasattr(ip))
- return -ENOATTR;
-
error = xfs_attr_args_init(&args, ip, name, flags);
if (error)
return error;
@@ -392,9 +389,6 @@ xfs_attr_remove(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
- if (!xfs_inode_hasattr(dp))
- return -ENOATTR;
-
error = xfs_attr_args_init(&args, dp, name, flags);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 2760bc3b2536..2ae55db8c977 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -740,15 +740,9 @@ xfs_bmap_extents_to_btree(
* Fill in the root.
*/
block = ifp->if_broot;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
- XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
- XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
- XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
+ xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
+ XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
XFS_BTREE_LONG_PTRS);
-
/*
* Need a cursor. Can't allocate until bb_level is filled in.
*/
@@ -817,13 +811,8 @@ try_another_ag:
*/
abp->b_ops = &xfs_bmbt_buf_ops;
ablock = XFS_BUF_TO_BLOCK(abp);
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block_int(mp, ablock, abp->b_bn,
- XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
- XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block_int(mp, ablock, abp->b_bn,
- XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
+ xfs_btree_init_block_int(mp, ablock, abp->b_bn,
+ XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
XFS_BTREE_LONG_PTRS);
arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
@@ -1278,7 +1267,6 @@ xfs_bmap_read_extents(
/* REFERENCED */
xfs_extnum_t room; /* number of entries there's room for */
- bno = NULLFSBLOCK;
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
@@ -1291,9 +1279,7 @@ xfs_bmap_read_extents(
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLFSBLOCK);
- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
/*
* Go down the tree until leaf level is reached, following the first
* pointer (leftmost) at each level.
@@ -1864,6 +1850,7 @@ xfs_bmap_add_extent_delay_real(
*/
trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_startblock(ep, new->br_startblock);
+ xfs_bmbt_set_state(ep, new->br_state);
trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
(*nextents)++;
@@ -2202,6 +2189,7 @@ STATIC int /* error */
xfs_bmap_add_extent_unwritten_real(
struct xfs_trans *tp,
xfs_inode_t *ip, /* incore inode pointer */
+ int whichfork,
xfs_extnum_t *idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
@@ -2221,12 +2209,14 @@ xfs_bmap_add_extent_unwritten_real(
/* left is 0, right is 1, prev is 2 */
int rval=0; /* return value (logging flags) */
int state = 0;/* state bits, accessed thru macros */
- struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_mount *mp = ip->i_mount;
*logflagsp = 0;
cur = *curp;
- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;
ASSERT(*idx >= 0);
ASSERT(*idx <= xfs_iext_count(ifp));
@@ -2285,7 +2275,7 @@ xfs_bmap_add_extent_unwritten_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (*idx < xfs_iext_count(&ip->i_df) - 1) {
+ if (*idx < xfs_iext_count(ifp) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
if (isnullstartblock(RIGHT.br_startblock))
@@ -2325,7 +2315,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 2, state);
- ip->i_d.di_nextents -= 2;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2368,7 +2359,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 1, state);
- ip->i_d.di_nextents--;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2403,7 +2395,8 @@ xfs_bmap_add_extent_unwritten_real(
xfs_bmbt_set_state(ep, newext);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 1, state);
- ip->i_d.di_nextents--;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2515,7 +2508,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_insert(ip, *idx, 1, new, state);
- ip->i_d.di_nextents++;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2593,7 +2587,8 @@ xfs_bmap_add_extent_unwritten_real(
++*idx;
xfs_iext_insert(ip, *idx, 1, new, state);
- ip->i_d.di_nextents++;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2641,7 +2636,8 @@ xfs_bmap_add_extent_unwritten_real(
++*idx;
xfs_iext_insert(ip, *idx, 2, &r[0], state);
- ip->i_d.di_nextents += 2;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2695,17 +2691,17 @@ xfs_bmap_add_extent_unwritten_real(
}
/* update reverse mappings */
- error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
+ error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
if (error)
goto done;
/* convert to a btree if necessary */
- if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(cur == NULL);
error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
- 0, &tmp_logflags, XFS_DATA_FORK);
+ 0, &tmp_logflags, whichfork);
*logflagsp |= tmp_logflags;
if (error)
goto done;
@@ -2717,7 +2713,7 @@ xfs_bmap_add_extent_unwritten_real(
*curp = cur;
}
- xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
+ xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
done:
*logflagsp |= rval;
return error;
@@ -2809,7 +2805,8 @@ xfs_bmap_add_extent_hole_delay(
oldlen = startblockval(left.br_startblock) +
startblockval(new->br_startblock) +
startblockval(right.br_startblock);
- newlen = xfs_bmap_worst_indlen(ip, temp);
+ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ oldlen);
xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
nullstartblock((int)newlen));
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
@@ -2830,7 +2827,8 @@ xfs_bmap_add_extent_hole_delay(
xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
oldlen = startblockval(left.br_startblock) +
startblockval(new->br_startblock);
- newlen = xfs_bmap_worst_indlen(ip, temp);
+ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ oldlen);
xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
nullstartblock((int)newlen));
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
@@ -2846,7 +2844,8 @@ xfs_bmap_add_extent_hole_delay(
temp = new->br_blockcount + right.br_blockcount;
oldlen = startblockval(new->br_startblock) +
startblockval(right.br_startblock);
- newlen = xfs_bmap_worst_indlen(ip, temp);
+ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ oldlen);
xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
new->br_startoff,
nullstartblock((int)newlen), temp, right.br_state);
@@ -2899,13 +2898,14 @@ xfs_bmap_add_extent_hole_real(
ASSERT(!isnullstartblock(new->br_startblock));
ASSERT(!bma->cur ||
!(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
- ASSERT(whichfork != XFS_COW_FORK);
XFS_STATS_INC(mp, xs_add_exlist);
state = 0;
if (whichfork == XFS_ATTR_FORK)
state |= BMAP_ATTRFORK;
+ if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;
/*
* Check and set flags if this segment has a left neighbor.
@@ -3629,7 +3629,7 @@ xfs_bmap_btalloc(
align = xfs_get_cowextsz_hint(ap->ip);
else if (xfs_alloc_is_userdata(ap->datatype))
align = xfs_get_extsz_hint(ap->ip);
- if (unlikely(align)) {
+ if (align) {
error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
align, 0, ap->eof, 0, ap->conv,
&ap->offset, &ap->length);
@@ -3701,7 +3701,7 @@ xfs_bmap_btalloc(
args.minlen = ap->minlen;
}
/* apply extent size hints if obtained earlier */
- if (unlikely(align)) {
+ if (align) {
args.prod = align;
if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
args.mod = (xfs_extlen_t)(args.prod - args.mod);
@@ -3812,7 +3812,6 @@ xfs_bmap_btalloc(
args.fsbno = 0;
args.type = XFS_ALLOCTYPE_FIRST_AG;
args.total = ap->minlen;
- args.minleft = 0;
if ((error = xfs_alloc_vextent(&args)))
return error;
ap->dfops->dop_low = true;
@@ -4344,8 +4343,6 @@ xfs_bmapi_allocate(
if (error)
return error;
- if (bma->dfops->dop_low)
- bma->minleft = 0;
if (bma->cur)
bma->cur->bc_private.b.firstblock = *bma->firstblock;
if (bma->blkno == NULLFSBLOCK)
@@ -4371,10 +4368,16 @@ xfs_bmapi_allocate(
bma->got.br_state = XFS_EXT_NORM;
/*
- * A wasdelay extent has been initialized, so shouldn't be flagged
- * as unwritten.
+ * In the data fork, a wasdelay extent has been initialized, so
+ * shouldn't be flagged as unwritten.
+ *
+ * For the cow fork, however, we convert delalloc reservations
+ * (extents allocated for speculative preallocation) to
+ * allocated unwritten extents, and only convert the unwritten
+ * extents to real extents when we're about to write the data.
*/
- if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
+ if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
+ (bma->flags & XFS_BMAPI_PREALLOC) &&
xfs_sb_version_hasextflgbit(&mp->m_sb))
bma->got.br_state = XFS_EXT_UNWRITTEN;
@@ -4425,8 +4428,6 @@ xfs_bmapi_convert_unwritten(
(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
return 0;
- ASSERT(whichfork != XFS_COW_FORK);
-
/*
* Modify (by adding) the state flag, if writing.
*/
@@ -4451,8 +4452,8 @@ xfs_bmapi_convert_unwritten(
return error;
}
- error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
- &bma->cur, mval, bma->firstblock, bma->dfops,
+ error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
+ &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
&tmp_logflags);
/*
* Log the inode core unconditionally in the unwritten extent conversion
@@ -4461,8 +4462,12 @@ xfs_bmapi_convert_unwritten(
* in the transaction for the sake of fsync(), even if nothing has
* changed, because fsync() will not force the log for this transaction
* unless it sees the inode pinned.
+ *
+ * Note: If we're only converting cow fork extents, there aren't
+ * any on-disk updates to make, so we don't need to log anything.
*/
- bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
+ if (whichfork != XFS_COW_FORK)
+ bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
if (error)
return error;
@@ -4517,8 +4522,6 @@ xfs_bmapi_write(
int n; /* current extent index */
xfs_fileoff_t obno; /* old block number (offset) */
int whichfork; /* data or attr fork */
- char inhole; /* current location is hole in file */
- char wasdelay; /* old extent was delayed */
#ifdef DEBUG
xfs_fileoff_t orig_bno; /* original block number value */
@@ -4538,15 +4541,15 @@ xfs_bmapi_write(
ASSERT(*nmap >= 1);
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
ASSERT(!(flags & XFS_BMAPI_IGSTATE));
- ASSERT(tp != NULL);
+ ASSERT(tp != NULL ||
+ (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
+ (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
ASSERT(len > 0);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
- ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
- ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
/* zeroing is for currently only for data extents, not metadata */
ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
@@ -4606,22 +4609,44 @@ xfs_bmapi_write(
bma.firstblock = firstblock;
while (bno < end && n < *nmap) {
- inhole = eof || bma.got.br_startoff > bno;
- wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
+ bool need_alloc = false, wasdelay = false;
- /*
- * Make sure we only reflink into a hole.
- */
- if (flags & XFS_BMAPI_REMAP)
- ASSERT(inhole);
- if (flags & XFS_BMAPI_COWFORK)
- ASSERT(!inhole);
+ /* in hole or beyoned EOF? */
+ if (eof || bma.got.br_startoff > bno) {
+ if (flags & XFS_BMAPI_DELALLOC) {
+ /*
+ * For the COW fork we can reasonably get a
+ * request for converting an extent that races
+ * with other threads already having converted
+ * part of it, as there converting COW to
+ * regular blocks is not protected using the
+ * IOLOCK.
+ */
+ ASSERT(flags & XFS_BMAPI_COWFORK);
+ if (!(flags & XFS_BMAPI_COWFORK)) {
+ error = -EIO;
+ goto error0;
+ }
+
+ if (eof || bno >= end)
+ break;
+ } else {
+ need_alloc = true;
+ }
+ } else {
+ /*
+ * Make sure we only reflink into a hole.
+ */
+ ASSERT(!(flags & XFS_BMAPI_REMAP));
+ if (isnullstartblock(bma.got.br_startblock))
+ wasdelay = true;
+ }
/*
* First, deal with the hole before the allocated space
* that we found, if any.
*/
- if (inhole || wasdelay) {
+ if (need_alloc || wasdelay) {
bma.eof = eof;
bma.conv = !!(flags & XFS_BMAPI_CONVERT);
bma.wasdel = wasdelay;
@@ -4770,34 +4795,59 @@ xfs_bmap_split_indlen(
xfs_filblks_t len2 = *indlen2;
xfs_filblks_t nres = len1 + len2; /* new total res. */
xfs_filblks_t stolen = 0;
+ xfs_filblks_t resfactor;
/*
* Steal as many blocks as we can to try and satisfy the worst case
* indlen for both new extents.
*/
- while (nres > ores && avail) {
- nres--;
- avail--;
- stolen++;
- }
+ if (ores < nres && avail)
+ stolen = XFS_FILBLKS_MIN(nres - ores, avail);
+ ores += stolen;
+
+ /* nothing else to do if we've satisfied the new reservation */
+ if (ores >= nres)
+ return stolen;
+
+ /*
+ * We can't meet the total required reservation for the two extents.
+ * Calculate the percent of the overall shortage between both extents
+ * and apply this percentage to each of the requested indlen values.
+ * This distributes the shortage fairly and reduces the chances that one
+ * of the two extents is left with nothing when extents are repeatedly
+ * split.
+ */
+ resfactor = (ores * 100);
+ do_div(resfactor, nres);
+ len1 *= resfactor;
+ do_div(len1, 100);
+ len2 *= resfactor;
+ do_div(len2, 100);
+ ASSERT(len1 + len2 <= ores);
+ ASSERT(len1 < *indlen1 && len2 < *indlen2);
/*
- * The only blocks available are those reserved for the original
- * extent and what we can steal from the extent being removed.
- * If this still isn't enough to satisfy the combined
- * requirements for the two new extents, skim blocks off of each
- * of the new reservations until they match what is available.
+ * Hand out the remainder to each extent. If one of the two reservations
+ * is zero, we want to make sure that one gets a block first. The loop
+ * below starts with len1, so hand len2 a block right off the bat if it
+ * is zero.
*/
- while (nres > ores) {
- if (len1) {
- len1--;
- nres--;
+ ores -= (len1 + len2);
+ ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
+ if (ores && !len2 && *indlen2) {
+ len2++;
+ ores--;
+ }
+ while (ores) {
+ if (len1 < *indlen1) {
+ len1++;
+ ores--;
}
- if (nres == ores)
+ if (!ores)
break;
- if (len2) {
- len2--;
- nres--;
+ if (len2 < *indlen2) {
+ len2++;
+ ores--;
}
}
@@ -5539,8 +5589,8 @@ __xfs_bunmapi(
}
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp, ip,
- &lastx, &cur, &del, firstblock, dfops,
- &logflags);
+ whichfork, &lastx, &cur, &del,
+ firstblock, dfops, &logflags);
if (error)
goto error0;
goto nodelete;
@@ -5593,8 +5643,9 @@ __xfs_bunmapi(
prev.br_state = XFS_EXT_UNWRITTEN;
lastx--;
error = xfs_bmap_add_extent_unwritten_real(tp,
- ip, &lastx, &cur, &prev,
- firstblock, dfops, &logflags);
+ ip, whichfork, &lastx, &cur,
+ &prev, firstblock, dfops,
+ &logflags);
if (error)
goto error0;
goto nodelete;
@@ -5602,8 +5653,9 @@ __xfs_bunmapi(
ASSERT(del.br_state == XFS_EXT_NORM);
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp,
- ip, &lastx, &cur, &del,
- firstblock, dfops, &logflags);
+ ip, whichfork, &lastx, &cur,
+ &del, firstblock, dfops,
+ &logflags);
if (error)
goto error0;
goto nodelete;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index cecd094404cc..cdef87db5262 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -110,6 +110,9 @@ struct xfs_extent_free_item
/* Map something in the CoW fork. */
#define XFS_BMAPI_COWFORK 0x200
+/* Only convert delalloc space, don't allocate entirely new extents */
+#define XFS_BMAPI_DELALLOC 0x400
+
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
@@ -120,7 +123,8 @@ struct xfs_extent_free_item
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
{ XFS_BMAPI_ZERO, "ZERO" }, \
{ XFS_BMAPI_REMAP, "REMAP" }, \
- { XFS_BMAPI_COWFORK, "COWFORK" }
+ { XFS_BMAPI_COWFORK, "COWFORK" }, \
+ { XFS_BMAPI_DELALLOC, "DELALLOC" }
static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index d6330c297ca0..f93072b58a58 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -71,15 +71,9 @@ xfs_bmdr_to_bmbt(
xfs_bmbt_key_t *tkp;
__be64 *tpp;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
- XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
- XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
- XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
+ xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
+ XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
XFS_BTREE_LONG_PTRS);
-
rblock->bb_level = dblock->bb_level;
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
rblock->bb_numrecs = dblock->bb_numrecs;
@@ -502,12 +496,11 @@ try_another_ag:
if (args.fsbno == NULLFSBLOCK && args.minleft) {
/*
* Could not find an AG with enough free space to satisfy
- * a full btree split. Try again without minleft and if
+ * a full btree split. Try again and if
* successful activate the lowspace algorithm.
*/
args.fsbno = 0;
args.type = XFS_ALLOCTYPE_FIRST_AG;
- args.minleft = 0;
error = xfs_alloc_vextent(&args);
if (error)
goto error0;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 21e6a6ab6b9a..c3decedc9455 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -50,8 +50,18 @@ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
XFS_REFC_CRC_MAGIC }
};
-#define xfs_btree_magic(cur) \
- xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
+
+__uint32_t
+xfs_btree_magic(
+ int crc,
+ xfs_btnum_t btnum)
+{
+ __uint32_t magic = xfs_magics[crc][btnum];
+
+ /* Ensure we asked for crc for crc-only magics. */
+ ASSERT(magic != 0);
+ return magic;
+}
STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lblock(
@@ -62,10 +72,13 @@ xfs_btree_check_lblock(
{
int lblock_ok = 1; /* block passes checks */
struct xfs_mount *mp; /* file system mount point */
+ xfs_btnum_t btnum = cur->bc_btnum;
+ int crc;
mp = cur->bc_mp;
+ crc = xfs_sb_version_hascrc(&mp->m_sb);
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (crc) {
lblock_ok = lblock_ok &&
uuid_equal(&block->bb_u.l.bb_uuid,
&mp->m_sb.sb_meta_uuid) &&
@@ -74,7 +87,7 @@ xfs_btree_check_lblock(
}
lblock_ok = lblock_ok &&
- be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
@@ -110,13 +123,16 @@ xfs_btree_check_sblock(
struct xfs_agf *agf; /* ag. freespace structure */
xfs_agblock_t agflen; /* native ag. freespace length */
int sblock_ok = 1; /* block passes checks */
+ xfs_btnum_t btnum = cur->bc_btnum;
+ int crc;
mp = cur->bc_mp;
+ crc = xfs_sb_version_hascrc(&mp->m_sb);
agbp = cur->bc_private.a.agbp;
agf = XFS_BUF_TO_AGF(agbp);
agflen = be32_to_cpu(agf->agf_length);
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (crc) {
sblock_ok = sblock_ok &&
uuid_equal(&block->bb_u.s.bb_uuid,
&mp->m_sb.sb_meta_uuid) &&
@@ -125,7 +141,7 @@ xfs_btree_check_sblock(
}
sblock_ok = sblock_ok &&
- be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
@@ -810,7 +826,8 @@ xfs_btree_read_bufl(
xfs_daddr_t d; /* real disk block address */
int error;
- ASSERT(fsbno != NULLFSBLOCK);
+ if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
+ return -EFSCORRUPTED;
d = XFS_FSB_TO_DADDR(mp, fsbno);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
mp->m_bsize, lock, &bp, ops);
@@ -1084,12 +1101,15 @@ xfs_btree_init_block_int(
struct xfs_mount *mp,
struct xfs_btree_block *buf,
xfs_daddr_t blkno,
- __u32 magic,
+ xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
__u64 owner,
unsigned int flags)
{
+ int crc = xfs_sb_version_hascrc(&mp->m_sb);
+ __u32 magic = xfs_btree_magic(crc, btnum);
+
buf->bb_magic = cpu_to_be32(magic);
buf->bb_level = cpu_to_be16(level);
buf->bb_numrecs = cpu_to_be16(numrecs);
@@ -1097,7 +1117,7 @@ xfs_btree_init_block_int(
if (flags & XFS_BTREE_LONG_PTRS) {
buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
- if (flags & XFS_BTREE_CRC_BLOCKS) {
+ if (crc) {
buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
buf->bb_u.l.bb_owner = cpu_to_be64(owner);
uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
@@ -1110,7 +1130,7 @@ xfs_btree_init_block_int(
buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- if (flags & XFS_BTREE_CRC_BLOCKS) {
+ if (crc) {
buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
@@ -1123,14 +1143,14 @@ void
xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_buf *bp,
- __u32 magic,
+ xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
__u64 owner,
unsigned int flags)
{
xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
- magic, level, numrecs, owner, flags);
+ btnum, level, numrecs, owner, flags);
}
STATIC void
@@ -1140,7 +1160,7 @@ xfs_btree_init_block_cur(
int level,
int numrecs)
{
- __u64 owner;
+ __u64 owner;
/*
* we can pull the owner from the cursor right now as the different
@@ -1154,7 +1174,7 @@ xfs_btree_init_block_cur(
owner = cur->bc_private.a.agno;
xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
- xfs_btree_magic(cur), level, numrecs,
+ cur->bc_btnum, level, numrecs,
owner, cur->bc_flags);
}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index b69b947c4c1b..4bb62580a7fd 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -76,6 +76,8 @@ union xfs_btree_rec {
#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
#define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi)
+__uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
+
/*
* For logging record fields.
*/
@@ -378,7 +380,7 @@ void
xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_buf *bp,
- __u32 magic,
+ xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
__u64 owner,
@@ -389,7 +391,7 @@ xfs_btree_init_block_int(
struct xfs_mount *mp,
struct xfs_btree_block *buf,
xfs_daddr_t blkno,
- __u32 magic,
+ xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
__u64 owner,
@@ -456,7 +458,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
#define XFS_FSB_SANITY_CHECK(mp,fsb) \
- (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
+ (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
/*
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index f2dc1a950c85..1bdf2888295b 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2633,7 +2633,7 @@ out_free:
/*
* Readahead the dir/attr block.
*/
-xfs_daddr_t
+int
xfs_da_reada_buf(
struct xfs_inode *dp,
xfs_dablk_t bno,
@@ -2664,7 +2664,5 @@ out_free:
if (mapp != &map)
kmem_free(mapp);
- if (error)
- return -1;
- return mappedbno;
+ return error;
}
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 98c75cbe6ac2..4e29cb6a3627 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp, int whichfork,
const struct xfs_buf_ops *ops);
-xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
+int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
xfs_daddr_t mapped_bno, int whichfork,
const struct xfs_buf_ops *ops);
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index c58d72c220f5..2f389d366e93 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -36,21 +36,29 @@
struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
/*
- * @mode, if set, indicates that the type field needs to be set up.
- * This uses the transformation from file mode to DT_* as defined in linux/fs.h
- * for file type specification. This will be propagated into the directory
- * structure if appropriate for the given operation and filesystem config.
+ * Convert inode mode to directory entry filetype
*/
-const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = {
- [0] = XFS_DIR3_FT_UNKNOWN,
- [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR,
- [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK,
-};
+unsigned char xfs_mode_to_ftype(int mode)
+{
+ switch (mode & S_IFMT) {
+ case S_IFREG:
+ return XFS_DIR3_FT_REG_FILE;
+ case S_IFDIR:
+ return XFS_DIR3_FT_DIR;
+ case S_IFCHR:
+ return XFS_DIR3_FT_CHRDEV;
+ case S_IFBLK:
+ return XFS_DIR3_FT_BLKDEV;
+ case S_IFIFO:
+ return XFS_DIR3_FT_FIFO;
+ case S_IFSOCK:
+ return XFS_DIR3_FT_SOCK;
+ case S_IFLNK:
+ return XFS_DIR3_FT_SYMLINK;
+ default:
+ return XFS_DIR3_FT_UNKNOWN;
+ }
+}
/*
* ASCII case-insensitive (ie. A-Z) support for directories that was
@@ -631,7 +639,8 @@ xfs_dir2_isblock(
if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
return rval;
rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
- ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize);
+ if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize)
+ return -EFSCORRUPTED;
*vp = rval;
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 0197590fa7d7..d6e6d9d16f6c 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -18,6 +18,9 @@
#ifndef __XFS_DIR2_H__
#define __XFS_DIR2_H__
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+
struct xfs_defer_ops;
struct xfs_da_args;
struct xfs_inode;
@@ -32,10 +35,9 @@ struct xfs_dir2_data_unused;
extern struct xfs_name xfs_name_dotdot;
/*
- * directory filetype conversion tables.
+ * Convert inode mode to directory entry filetype
*/
-#define S_SHIFT 12
-extern const unsigned char xfs_mode_to_ftype[];
+extern unsigned char xfs_mode_to_ftype(int mode);
/*
* directory operations vector for encode/decode routines
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 75a557432d0f..bbd1238852b3 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
.verify_write = xfs_dir3_free_write_verify,
};
+/* Everything ok in the free block header? */
+static bool
+xfs_dir3_free_header_check(
+ struct xfs_inode *dp,
+ xfs_dablk_t fbno,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ unsigned int firstdb;
+ int maxbests;
+
+ maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo);
+ firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) -
+ xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
+ maxbests;
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
+
+ if (be32_to_cpu(hdr3->firstdb) != firstdb)
+ return false;
+ if (be32_to_cpu(hdr3->nvalid) > maxbests)
+ return false;
+ if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
+ return false;
+ } else {
+ struct xfs_dir2_free_hdr *hdr = bp->b_addr;
+
+ if (be32_to_cpu(hdr->firstdb) != firstdb)
+ return false;
+ if (be32_to_cpu(hdr->nvalid) > maxbests)
+ return false;
+ if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused))
+ return false;
+ }
+ return true;
+}
static int
__xfs_dir3_free_read(
@@ -168,11 +204,22 @@ __xfs_dir3_free_read(
err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
+ if (err || !*bpp)
+ return err;
+
+ /* Check things that we can't do in the verifier. */
+ if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) {
+ xfs_buf_ioerror(*bpp, -EFSCORRUPTED);
+ xfs_verifier_error(*bpp);
+ xfs_trans_brelse(tp, *bpp);
+ return -EFSCORRUPTED;
+ }
/* try read returns without an error or *bpp if it lands in a hole */
- if (!err && tp && *bpp)
+ if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
- return err;
+
+ return 0;
}
int
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 0fd086d03d41..7c471881c9a6 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -82,11 +82,12 @@ xfs_finobt_set_root(
}
STATIC int
-xfs_inobt_alloc_block(
+__xfs_inobt_alloc_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *start,
union xfs_btree_ptr *new,
- int *stat)
+ int *stat,
+ enum xfs_ag_resv_type resv)
{
xfs_alloc_arg_t args; /* block allocation args */
int error; /* error return value */
@@ -103,6 +104,7 @@ xfs_inobt_alloc_block(
args.maxlen = 1;
args.prod = 1;
args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ args.resv = resv;
error = xfs_alloc_vextent(&args);
if (error) {
@@ -123,6 +125,27 @@ xfs_inobt_alloc_block(
}
STATIC int
+xfs_inobt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE);
+}
+
+STATIC int
+xfs_finobt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ return __xfs_inobt_alloc_block(cur, start, new, stat,
+ XFS_AG_RESV_METADATA);
+}
+
+STATIC int
xfs_inobt_free_block(
struct xfs_btree_cur *cur,
struct xfs_buf *bp)
@@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
.dup_cursor = xfs_inobt_dup_cursor,
.set_root = xfs_finobt_set_root,
- .alloc_block = xfs_inobt_alloc_block,
+ .alloc_block = xfs_finobt_alloc_block,
.free_block = xfs_inobt_free_block,
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
@@ -480,3 +503,64 @@ xfs_inobt_rec_check_count(
return 0;
}
#endif /* DEBUG */
+
+static xfs_extlen_t
+xfs_inobt_max_size(
+ struct xfs_mount *mp)
+{
+ /* Bail out if we're uninitialized, which can happen in mkfs. */
+ if (mp->m_inobt_mxr[0] == 0)
+ return 0;
+
+ return xfs_btree_calc_size(mp, mp->m_inobt_mnr,
+ (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock /
+ XFS_INODES_PER_CHUNK);
+}
+
+static int
+xfs_inobt_count_blocks(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_btnum_t btnum,
+ xfs_extlen_t *tree_blocks)
+{
+ struct xfs_buf *agbp;
+ struct xfs_btree_cur *cur;
+ int error;
+
+ error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
+ if (error)
+ return error;
+
+ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum);
+ error = xfs_btree_count_blocks(cur, tree_blocks);
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+ xfs_buf_relse(agbp);
+
+ return error;
+}
+
+/*
+ * Figure out how many blocks to reserve and how many are used by this btree.
+ */
+int
+xfs_finobt_calc_reserves(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_extlen_t *ask,
+ xfs_extlen_t *used)
+{
+ xfs_extlen_t tree_len = 0;
+ int error;
+
+ if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+ return 0;
+
+ error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len);
+ if (error)
+ return error;
+
+ *ask += xfs_inobt_max_size(mp);
+ *used += tree_len;
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index bd88453217ce..aa81e2e63f3f 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *,
#define xfs_inobt_rec_check_count(mp, rec) 0
#endif /* DEBUG */
+int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_extlen_t *ask, xfs_extlen_t *used);
+
#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index dd483e2767f7..d93f9d918cfc 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -29,6 +29,7 @@
#include "xfs_icache.h"
#include "xfs_trans.h"
#include "xfs_ialloc.h"
+#include "xfs_dir2.h"
/*
* Check that none of the inode's in the buffer have a next
@@ -386,6 +387,7 @@ xfs_dinode_verify(
xfs_ino_t ino,
struct xfs_dinode *dip)
{
+ uint16_t mode;
uint16_t flags;
uint64_t flags2;
@@ -396,8 +398,12 @@ xfs_dinode_verify(
if (be64_to_cpu(dip->di_size) & (1ULL << 63))
return false;
- /* No zero-length symlinks. */
- if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0)
+ mode = be16_to_cpu(dip->di_mode);
+ if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
+ return false;
+
+ /* No zero-length symlinks/dirs. */
+ if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0)
return false;
/* only version 3 or greater inodes are extensively verified here */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 222e103356c6..25c1e078aef6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -26,6 +26,7 @@
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
+#include "xfs_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
@@ -429,11 +430,13 @@ xfs_iformat_btree(
/* REFERENCED */
int nrecs;
int size;
+ int level;
ifp = XFS_IFORK_PTR(ip, whichfork);
dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
size = XFS_BMAP_BROOT_SPACE(mp, dfp);
nrecs = be16_to_cpu(dfp->bb_numrecs);
+ level = be16_to_cpu(dfp->bb_level);
/*
* blow out if -- fork has less extents than can fit in
@@ -446,7 +449,8 @@ xfs_iformat_btree(
XFS_IFORK_MAXEXT(ip, whichfork) ||
XFS_BMDR_SPACE_CALC(nrecs) >
XFS_DFORK_SIZE(dip, mp, whichfork) ||
- XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
+ XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
+ level == 0 || level > XFS_BTREE_MAXLEVELS) {
xfs_warn(mp, "corrupt inode %Lu (btree).",
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
@@ -497,15 +501,14 @@ xfs_iread_extents(
* We know that the size is valid (it's checked in iformat_btree)
*/
ifp->if_bytes = ifp->if_real_bytes = 0;
- ifp->if_flags |= XFS_IFEXTENTS;
xfs_iext_add(ifp, 0, nextents);
error = xfs_bmap_read_extents(tp, ip, whichfork);
if (error) {
xfs_iext_destroy(ifp);
- ifp->if_flags &= ~XFS_IFEXTENTS;
return error;
}
xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
+ ifp->if_flags |= XFS_IFEXTENTS;
return 0;
}
/*
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index d9f65e2d5cc8..29a01ec89dd0 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -42,7 +42,6 @@ typedef struct xlog_recover_item {
xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
} xlog_recover_item_t;
-struct xlog_tid;
typedef struct xlog_recover {
struct hlist_node r_list;
xlog_tid_t r_log_tid; /* log's transaction id */
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 6fb2215f8ff7..50add5272807 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -409,13 +409,14 @@ xfs_refcountbt_calc_size(
*/
xfs_extlen_t
xfs_refcountbt_max_size(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ xfs_agblock_t agblocks)
{
/* Bail out if we're uninitialized, which can happen in mkfs. */
if (mp->m_refc_mxr[0] == 0)
return 0;
- return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks);
+ return xfs_refcountbt_calc_size(mp, agblocks);
}
/*
@@ -430,22 +431,24 @@ xfs_refcountbt_calc_reserves(
{
struct xfs_buf *agbp;
struct xfs_agf *agf;
+ xfs_agblock_t agblocks;
xfs_extlen_t tree_len;
int error;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return 0;
- *ask += xfs_refcountbt_max_size(mp);
error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
if (error)
return error;
agf = XFS_BUF_TO_AGF(agbp);
+ agblocks = be32_to_cpu(agf->agf_length);
tree_len = be32_to_cpu(agf->agf_refcount_blocks);
xfs_buf_relse(agbp);
+ *ask += xfs_refcountbt_max_size(mp, agblocks);
*used += tree_len;
return error;
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h
index 3be7768bd51a..9db008b955b7 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.h
+++ b/fs/xfs/libxfs/xfs_refcount_btree.h
@@ -66,7 +66,8 @@ extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp);
extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
-extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp);
+extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp,
+ xfs_agblock_t agblocks);
extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp,
xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index de25771764ba..74e5a54bc428 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -550,13 +550,14 @@ xfs_rmapbt_calc_size(
*/
xfs_extlen_t
xfs_rmapbt_max_size(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ xfs_agblock_t agblocks)
{
/* Bail out if we're uninitialized, which can happen in mkfs. */
if (mp->m_rmap_mxr[0] == 0)
return 0;
- return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks);
+ return xfs_rmapbt_calc_size(mp, agblocks);
}
/*
@@ -571,25 +572,24 @@ xfs_rmapbt_calc_reserves(
{
struct xfs_buf *agbp;
struct xfs_agf *agf;
- xfs_extlen_t pool_len;
+ xfs_agblock_t agblocks;
xfs_extlen_t tree_len;
int error;
if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
return 0;
- /* Reserve 1% of the AG or enough for 1 block per record. */
- pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp));
- *ask += pool_len;
-
error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
if (error)
return error;
agf = XFS_BUF_TO_AGF(agbp);
+ agblocks = be32_to_cpu(agf->agf_length);
tree_len = be32_to_cpu(agf->agf_rmap_blocks);
xfs_buf_relse(agbp);
+ /* Reserve 1% of the AG or enough for 1 block per record. */
+ *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
*used += tree_len;
return error;
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 2a9ac472fb15..19c08e933049 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -60,7 +60,8 @@ extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
-extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp);
+extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp,
+ xfs_agblock_t agblocks);
extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp,
xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 2580262e4ea0..584ec896a533 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -242,7 +242,7 @@ xfs_mount_validate_sb(
sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG ||
sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG ||
sbp->sb_blocksize != (1 << sbp->sb_blocklog) ||
- sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG ||
+ sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG ||
sbp->sb_inodesize < XFS_DINODE_MIN_SIZE ||
sbp->sb_inodesize > XFS_DINODE_MAX_SIZE ||
sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0f56fcd3a5d5..1ff9df7a3ce8 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -481,6 +481,12 @@ xfs_submit_ioend(
struct xfs_ioend *ioend,
int status)
{
+ /* Convert CoW extents to regular */
+ if (!status && ioend->io_type == XFS_IO_COW) {
+ status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
+ ioend->io_offset, ioend->io_size);
+ }
+
/* Reserve log space if we might write beyond the on-disk inode size. */
if (!status &&
ioend->io_type != XFS_IO_UNWRITTEN &&
@@ -1152,19 +1158,22 @@ xfs_vm_releasepage(
* block_invalidatepage() can send pages that are still marked dirty
* but otherwise have invalidated buffers.
*
- * We've historically freed buffers on the latter. Instead, quietly
- * filter out all dirty pages to avoid spurious buffer state warnings.
- * This can likely be removed once shrink_active_list() is fixed.
+ * We want to release the latter to avoid unnecessary buildup of the
+ * LRU, skip the former and warn if we've left any lingering
+ * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
+ * or unwritten buffers and warn if the page is not dirty. Otherwise
+ * try to release the buffers.
*/
- if (PageDirty(page))
- return 0;
-
xfs_count_page_state(page, &delalloc, &unwritten);
- if (WARN_ON_ONCE(delalloc))
+ if (delalloc) {
+ WARN_ON_ONCE(!PageDirty(page));
return 0;
- if (WARN_ON_ONCE(unwritten))
+ }
+ if (unwritten) {
+ WARN_ON_ONCE(!PageDirty(page));
return 0;
+ }
return try_to_free_buffers(page);
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index b9abce524c33..6be5f26783a1 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -528,7 +528,6 @@ xfs_getbmap(
xfs_bmbt_irec_t *map; /* buffer for user's data */
xfs_mount_t *mp; /* file system mount point */
int nex; /* # of user extents can do */
- int nexleft; /* # of user extents left */
int subnex; /* # of bmapi's can do */
int nmap; /* number of map entries */
struct getbmapx *out; /* output structure */
@@ -686,10 +685,8 @@ xfs_getbmap(
goto out_free_map;
}
- nexleft = nex;
-
do {
- nmap = (nexleft > subnex) ? subnex : nexleft;
+ nmap = (nex> subnex) ? subnex : nex;
error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
XFS_BB_TO_FSB(mp, bmv->bmv_length),
map, &nmap, bmapi_flags);
@@ -697,8 +694,8 @@ xfs_getbmap(
goto out_free_map;
ASSERT(nmap <= subnex);
- for (i = 0; i < nmap && nexleft && bmv->bmv_length &&
- cur_ext < bmv->bmv_count; i++) {
+ for (i = 0; i < nmap && bmv->bmv_length &&
+ cur_ext < bmv->bmv_count - 1; i++) {
out[cur_ext].bmv_oflags = 0;
if (map[i].br_state == XFS_EXT_UNWRITTEN)
out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
@@ -760,16 +757,27 @@ xfs_getbmap(
continue;
}
+ /*
+ * In order to report shared extents accurately,
+ * we report each distinct shared/unshared part
+ * of a single bmbt record using multiple bmap
+ * extents. To make that happen, we iterate the
+ * same map array item multiple times, each
+ * time trimming out the subextent that we just
+ * reported.
+ *
+ * Because of this, we must check the out array
+ * index (cur_ext) directly against bmv_count-1
+ * to avoid overflows.
+ */
if (inject_map.br_startblock != NULLFSBLOCK) {
map[i] = inject_map;
i--;
- } else
- nexleft--;
+ }
bmv->bmv_entries++;
cur_ext++;
}
- } while (nmap && nexleft && bmv->bmv_length &&
- cur_ext < bmv->bmv_count);
+ } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
out_free_map:
kmem_free(map);
@@ -779,11 +787,9 @@ xfs_getbmap(
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
for (i = 0; i < cur_ext; i++) {
- int full = 0; /* user array is full */
-
/* format results & advance arg */
- error = formatter(&arg, &out[i], &full);
- if (error || full)
+ error = formatter(&arg, &out[i]);
+ if (error)
break;
}
@@ -909,17 +915,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
*/
int
xfs_free_eofblocks(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- bool need_iolock)
+ struct xfs_inode *ip)
{
- xfs_trans_t *tp;
- int error;
- xfs_fileoff_t end_fsb;
- xfs_fileoff_t last_fsb;
- xfs_filblks_t map_len;
- int nimaps;
- xfs_bmbt_irec_t imap;
+ struct xfs_trans *tp;
+ int error;
+ xfs_fileoff_t end_fsb;
+ xfs_fileoff_t last_fsb;
+ xfs_filblks_t map_len;
+ int nimaps;
+ struct xfs_bmbt_irec imap;
+ struct xfs_mount *mp = ip->i_mount;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
/*
* Figure out if there are any blocks beyond the end
@@ -936,6 +943,10 @@ xfs_free_eofblocks(
error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ /*
+ * If there are blocks after the end of file, truncate the file to its
+ * current size to free them up.
+ */
if (!error && (nimaps != 0) &&
(imap.br_startblock != HOLESTARTBLOCK ||
ip->i_delayed_blks)) {
@@ -946,22 +957,13 @@ xfs_free_eofblocks(
if (error)
return error;
- /*
- * There are blocks after the end of file.
- * Free them up now by truncating the file to
- * its current size.
- */
- if (need_iolock) {
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
- return -EAGAIN;
- }
+ /* wait on dio to ensure i_size has settled */
+ inode_dio_wait(VFS_I(ip));
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
&tp);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- if (need_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
}
@@ -989,8 +991,6 @@ xfs_free_eofblocks(
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (need_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
}
return error;
}
@@ -1385,10 +1385,16 @@ xfs_shift_file_space(
xfs_fileoff_t stop_fsb;
xfs_fileoff_t next_fsb;
xfs_fileoff_t shift_fsb;
+ uint resblks;
ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
if (direction == SHIFT_LEFT) {
+ /*
+ * Reserve blocks to cover potential extent merges after left
+ * shift operations.
+ */
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
next_fsb = XFS_B_TO_FSB(mp, offset + len);
stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
} else {
@@ -1396,6 +1402,7 @@ xfs_shift_file_space(
* If right shift, delegate the work of initialization of
* next_fsb to xfs_bmap_shift_extent as it has ilock held.
*/
+ resblks = 0;
next_fsb = NULLFSBLOCK;
stop_fsb = XFS_B_TO_FSB(mp, offset);
}
@@ -1407,7 +1414,7 @@ xfs_shift_file_space(
* into the accessible region of the file.
*/
if (xfs_can_free_eofblocks(ip, true)) {
- error = xfs_free_eofblocks(mp, ip, false);
+ error = xfs_free_eofblocks(ip);
if (error)
return error;
}
@@ -1437,21 +1444,14 @@ xfs_shift_file_space(
}
while (!error && !done) {
- /*
- * We would need to reserve permanent block for transaction.
- * This will come into picture when after shifting extent into
- * hole we found that adjacent extents can be merged which
- * may lead to freeing of a block during record update.
- */
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
+ &tp);
if (error)
break;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
- ip->i_gdquot, ip->i_pdquot,
- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+ ip->i_gdquot, ip->i_pdquot, resblks, 0,
XFS_QMOPT_RES_REGBLKS);
if (error)
goto out_trans_cancel;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 68a621a8e0c0..135d8267e284 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -35,7 +35,7 @@ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
xfs_fileoff_t start_fsb, xfs_fileoff_t length);
/* bmap to userspace formatter - copy to user & advance pointer */
-typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
+typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *);
int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
xfs_bmap_format_t formatter, void *arg);
@@ -63,8 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
-int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
- bool need_iolock);
+int xfs_free_eofblocks(struct xfs_inode *ip);
int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
struct xfs_swapext *sx);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 7f0a01f7b592..ac3b4db519df 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -422,6 +422,7 @@ retry:
out_free_pages:
for (i = 0; i < bp->b_page_count; i++)
__free_page(bp->b_pages[i]);
+ bp->b_flags &= ~_XBF_PAGES;
return error;
}
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 2975cb2319f4..0306168af332 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1162,6 +1162,7 @@ xfs_buf_iodone_callbacks(
*/
bp->b_last_error = 0;
bp->b_retries = 0;
+ bp->b_first_retry_time = 0;
xfs_buf_do_callbacks(bp);
bp->b_fspriv = NULL;
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 4ff499aa7338..d796ffac7296 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -208,32 +208,3 @@ xfs_ioc_trim(
return -EFAULT;
return 0;
}
-
-int
-xfs_discard_extents(
- struct xfs_mount *mp,
- struct list_head *list)
-{
- struct xfs_extent_busy *busyp;
- int error = 0;
-
- list_for_each_entry(busyp, list, list) {
- trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
- busyp->length);
-
- error = blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
- XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
- XFS_FSB_TO_BB(mp, busyp->length),
- GFP_NOFS, 0);
- if (error && error != -EOPNOTSUPP) {
- xfs_info(mp,
- "discard failed for extent [0x%llx,%u], error %d",
- (unsigned long long)busyp->bno,
- busyp->length,
- error);
- return error;
- }
- }
-
- return 0;
-}
diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h
index 344879aea646..0f070f9e44e1 100644
--- a/fs/xfs/xfs_discard.h
+++ b/fs/xfs/xfs_discard.h
@@ -5,6 +5,5 @@ struct fstrim_range;
struct list_head;
extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
-extern int xfs_discard_extents(struct xfs_mount *, struct list_head *);
#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 7a30b8f11db7..9d06cc30e875 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -710,6 +710,10 @@ xfs_dq_get_next_id(
/* Simple advance */
next_id = *id + 1;
+ /* If we'd wrap past the max ID, stop */
+ if (next_id < *id)
+ return -ENOENT;
+
/* If new ID is within the current chunk, advancing it sufficed */
if (next_id % mp->m_quotainfo->qi_dqperchunk) {
*id = next_id;
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 162dc186cf04..77760dbf0242 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -45,18 +45,7 @@ xfs_extent_busy_insert(
struct rb_node **rbp;
struct rb_node *parent = NULL;
- new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
- if (!new) {
- /*
- * No Memory! Since it is now not possible to track the free
- * block, make this a synchronous transaction to insure that
- * the block is not reused before this transaction commits.
- */
- trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
- xfs_trans_set_sync(tp);
- return;
- }
-
+ new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP);
new->agno = agno;
new->bno = bno;
new->length = len;
@@ -345,25 +334,31 @@ restart:
* subset of the extent that is not busy. If *rlen is smaller than
* args->minlen no suitable extent could be found, and the higher level
* code needs to force out the log and retry the allocation.
+ *
+ * Return the current busy generation for the AG if the extent is busy. This
+ * value can be used to wait for at least one of the currently busy extents
+ * to be cleared. Note that the busy list is not guaranteed to be empty after
+ * the gen is woken. The state of a specific extent must always be confirmed
+ * with another call to xfs_extent_busy_trim() before it can be used.
*/
-void
+bool
xfs_extent_busy_trim(
struct xfs_alloc_arg *args,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- xfs_agblock_t *rbno,
- xfs_extlen_t *rlen)
+ xfs_agblock_t *bno,
+ xfs_extlen_t *len,
+ unsigned *busy_gen)
{
xfs_agblock_t fbno;
xfs_extlen_t flen;
struct rb_node *rbp;
+ bool ret = false;
- ASSERT(len > 0);
+ ASSERT(*len > 0);
spin_lock(&args->pag->pagb_lock);
restart:
- fbno = bno;
- flen = len;
+ fbno = *bno;
+ flen = *len;
rbp = args->pag->pagb_tree.rb_node;
while (rbp && flen >= args->minlen) {
struct xfs_extent_busy *busyp =
@@ -515,24 +510,25 @@ restart:
flen = fend - fbno;
}
- spin_unlock(&args->pag->pagb_lock);
+out:
- if (fbno != bno || flen != len) {
- trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
+ if (fbno != *bno || flen != *len) {
+ trace_xfs_extent_busy_trim(args->mp, args->agno, *bno, *len,
fbno, flen);
+ *bno = fbno;
+ *len = flen;
+ *busy_gen = args->pag->pagb_gen;
+ ret = true;
}
- *rbno = fbno;
- *rlen = flen;
- return;
+ spin_unlock(&args->pag->pagb_lock);
+ return ret;
fail:
/*
* Return a zero extent length as failure indications. All callers
* re-check if the trimmed extent satisfies the minlen requirement.
*/
- spin_unlock(&args->pag->pagb_lock);
- trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
- *rbno = fbno;
- *rlen = 0;
+ flen = 0;
+ goto out;
}
STATIC void
@@ -551,6 +547,21 @@ xfs_extent_busy_clear_one(
kmem_free(busyp);
}
+static void
+xfs_extent_busy_put_pag(
+ struct xfs_perag *pag,
+ bool wakeup)
+ __releases(pag->pagb_lock)
+{
+ if (wakeup) {
+ pag->pagb_gen++;
+ wake_up_all(&pag->pagb_wait);
+ }
+
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
+}
+
/*
* Remove all extents on the passed in list from the busy extents tree.
* If do_discard is set skip extents that need to be discarded, and mark
@@ -565,27 +576,76 @@ xfs_extent_busy_clear(
struct xfs_extent_busy *busyp, *n;
struct xfs_perag *pag = NULL;
xfs_agnumber_t agno = NULLAGNUMBER;
+ bool wakeup = false;
list_for_each_entry_safe(busyp, n, list, list) {
if (busyp->agno != agno) {
- if (pag) {
- spin_unlock(&pag->pagb_lock);
- xfs_perag_put(pag);
- }
- pag = xfs_perag_get(mp, busyp->agno);
- spin_lock(&pag->pagb_lock);
+ if (pag)
+ xfs_extent_busy_put_pag(pag, wakeup);
agno = busyp->agno;
+ pag = xfs_perag_get(mp, agno);
+ spin_lock(&pag->pagb_lock);
+ wakeup = false;
}
if (do_discard && busyp->length &&
- !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD))
+ !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) {
busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
- else
+ } else {
xfs_extent_busy_clear_one(mp, pag, busyp);
+ wakeup = true;
+ }
}
- if (pag) {
- spin_unlock(&pag->pagb_lock);
+ if (pag)
+ xfs_extent_busy_put_pag(pag, wakeup);
+}
+
+/*
+ * Flush out all busy extents for this AG.
+ */
+void
+xfs_extent_busy_flush(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ unsigned busy_gen)
+{
+ DEFINE_WAIT (wait);
+ int log_flushed = 0, error;
+
+ trace_xfs_log_force(mp, 0, _THIS_IP_);
+ error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed);
+ if (error)
+ return;
+
+ do {
+ prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
+ if (busy_gen != READ_ONCE(pag->pagb_gen))
+ break;
+ schedule();
+ } while (1);
+
+ finish_wait(&pag->pagb_wait, &wait);
+}
+
+void
+xfs_extent_busy_wait_all(
+ struct xfs_mount *mp)
+{
+ DEFINE_WAIT (wait);
+ xfs_agnumber_t agno;
+
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ struct xfs_perag *pag = xfs_perag_get(mp, agno);
+
+ do {
+ prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
+ if (RB_EMPTY_ROOT(&pag->pagb_tree))
+ break;
+ schedule();
+ } while (1);
+ finish_wait(&pag->pagb_wait, &wait);
+
xfs_perag_put(pag);
}
}
@@ -596,9 +656,17 @@ xfs_extent_busy_clear(
int
xfs_extent_busy_ag_cmp(
void *priv,
- struct list_head *a,
- struct list_head *b)
+ struct list_head *l1,
+ struct list_head *l2)
{
- return container_of(a, struct xfs_extent_busy, list)->agno -
- container_of(b, struct xfs_extent_busy, list)->agno;
+ struct xfs_extent_busy *b1 =
+ container_of(l1, struct xfs_extent_busy, list);
+ struct xfs_extent_busy *b2 =
+ container_of(l2, struct xfs_extent_busy, list);
+ s32 diff;
+
+ diff = b1->agno - b2->agno;
+ if (!diff)
+ diff = b1->bno - b2->bno;
+ return diff;
}
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index bfff284d2dcc..60195ea1b84a 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -58,9 +58,16 @@ void
xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
+bool
+xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
+ xfs_extlen_t *len, unsigned *busy_gen);
+
+void
+xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag,
+ unsigned busy_gen);
+
void
-xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno,
- xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen);
+xfs_extent_busy_wait_all(struct xfs_mount *mp);
int
xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index bbb9eb6811b2..086440e79b86 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -527,6 +527,15 @@ xfs_file_dio_aio_write(
if ((iocb->ki_pos & mp->m_blockmask) ||
((iocb->ki_pos + count) & mp->m_blockmask)) {
unaligned_io = 1;
+
+ /*
+ * We can't properly handle unaligned direct I/O to reflink
+ * files yet, as we can't unshare a partial block.
+ */
+ if (xfs_is_reflink_inode(ip)) {
+ trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
+ return -EREMCHG;
+ }
iolock = XFS_IOLOCK_EXCL;
} else {
iolock = XFS_IOLOCK_SHARED;
@@ -552,14 +561,6 @@ xfs_file_dio_aio_write(
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
-
- /* If this is a block-aligned directio CoW, remap immediately. */
- if (xfs_is_reflink_inode(ip) && !unaligned_io) {
- ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
- if (ret)
- goto out;
- }
-
ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
out:
xfs_iunlock(ip, iolock);
@@ -614,8 +615,10 @@ xfs_file_buffered_aio_write(
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
int enospc = 0;
- int iolock = XFS_IOLOCK_EXCL;
+ int iolock;
+write_retry:
+ iolock = XFS_IOLOCK_EXCL;
xfs_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
@@ -625,7 +628,6 @@ xfs_file_buffered_aio_write(
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
-write_retry:
trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
if (likely(ret >= 0))
@@ -641,18 +643,21 @@ write_retry:
* running at the same time.
*/
if (ret == -EDQUOT && !enospc) {
+ xfs_iunlock(ip, iolock);
enospc = xfs_inode_free_quota_eofblocks(ip);
if (enospc)
goto write_retry;
enospc = xfs_inode_free_quota_cowblocks(ip);
if (enospc)
goto write_retry;
+ iolock = 0;
} else if (ret == -ENOSPC && !enospc) {
struct xfs_eofblocks eofb = {0};
enospc = 1;
xfs_flush_inodes(ip->i_mount);
- eofb.eof_scan_owner = ip->i_ino; /* for locking */
+
+ xfs_iunlock(ip, iolock);
eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
xfs_icache_free_eofblocks(ip->i_mount, &eofb);
goto write_retry;
@@ -660,7 +665,8 @@ write_retry:
current->backing_dev_info = NULL;
out:
- xfs_iunlock(ip, iolock);
+ if (iolock)
+ xfs_iunlock(ip, iolock);
return ret;
}
@@ -908,9 +914,9 @@ xfs_dir_open(
*/
mode = xfs_ilock_data_map_shared(ip);
if (ip->i_d.di_nextents > 0)
- xfs_dir3_data_readahead(ip, 0, -1);
+ error = xfs_dir3_data_readahead(ip, 0, -1);
xfs_iunlock(ip, mode);
- return 0;
+ return error;
}
STATIC int
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 93d12fa2670d..6ccaae9eb0ee 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -352,12 +352,7 @@ xfs_growfs_data_private(
goto error0;
}
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1,
- agno, XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1,
- agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, agno, 0);
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
@@ -381,12 +376,7 @@ xfs_growfs_data_private(
goto error0;
}
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1,
- agno, XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1,
- agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, agno, 0);
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
@@ -413,8 +403,8 @@ xfs_growfs_data_private(
goto error0;
}
- xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0,
- agno, XFS_BTREE_CRC_BLOCKS);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 0,
+ agno, 0);
block = XFS_BUF_TO_BLOCK(bp);
@@ -488,12 +478,7 @@ xfs_growfs_data_private(
goto error0;
}
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0,
- agno, XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0,
- agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_INO , 0, 0, agno, 0);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
@@ -513,13 +498,8 @@ xfs_growfs_data_private(
goto error0;
}
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC,
- 0, 0, agno,
- XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0,
- 0, agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_FINO,
+ 0, 0, agno, 0);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
@@ -540,9 +520,8 @@ xfs_growfs_data_private(
goto error0;
}
- xfs_btree_init_block(mp, bp, XFS_REFC_CRC_MAGIC,
- 0, 0, agno,
- XFS_BTREE_CRC_BLOCKS);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_REFC,
+ 0, 0, agno, 0);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
@@ -631,6 +610,20 @@ xfs_growfs_data_private(
xfs_set_low_space_thresholds(mp);
mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+ /*
+ * If we expanded the last AG, free the per-AG reservation
+ * so we can reinitialize it with the new size.
+ */
+ if (new) {
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, agno);
+ error = xfs_ag_resv_free(pag);
+ xfs_perag_put(pag);
+ if (error)
+ goto out;
+ }
+
/* Reserve AG metadata blocks. */
error = xfs_fs_reserve_ag_blocks(mp);
if (error && error != -ENOSPC)
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index ff4d6311c7f4..7234b9748c36 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1322,13 +1322,10 @@ xfs_inode_free_eofblocks(
int flags,
void *args)
{
- int ret;
+ int ret = 0;
struct xfs_eofblocks *eofb = args;
- bool need_iolock = true;
int match;
- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
if (!xfs_can_free_eofblocks(ip, false)) {
/* inode could be preallocated or append-only */
trace_xfs_inode_free_eofblocks_invalid(ip);
@@ -1356,21 +1353,19 @@ xfs_inode_free_eofblocks(
if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
XFS_ISIZE(ip) < eofb->eof_min_file_size)
return 0;
-
- /*
- * A scan owner implies we already hold the iolock. Skip it in
- * xfs_free_eofblocks() to avoid deadlock. This also eliminates
- * the possibility of EAGAIN being returned.
- */
- if (eofb->eof_scan_owner == ip->i_ino)
- need_iolock = false;
}
- ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
-
- /* don't revisit the inode if we're not waiting */
- if (ret == -EAGAIN && !(flags & SYNC_WAIT))
- ret = 0;
+ /*
+ * If the caller is waiting, return -EAGAIN to keep the background
+ * scanner moving and revisit the inode in a subsequent pass.
+ */
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+ if (flags & SYNC_WAIT)
+ ret = -EAGAIN;
+ return ret;
+ }
+ ret = xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
@@ -1417,15 +1412,10 @@ __xfs_inode_free_quota_eofblocks(
struct xfs_eofblocks eofb = {0};
struct xfs_dquot *dq;
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-
/*
- * Set the scan owner to avoid a potential livelock. Otherwise, the scan
- * can repeatedly trylock on the inode we're currently processing. We
- * run a sync scan to increase effectiveness and use the union filter to
+ * Run a sync scan to increase effectiveness and use the union filter to
* cover all applicable quotas in a single scan.
*/
- eofb.eof_scan_owner = ip->i_ino;
eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
@@ -1577,12 +1567,9 @@ xfs_inode_free_cowblocks(
{
int ret;
struct xfs_eofblocks *eofb = args;
- bool need_iolock = true;
int match;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
/*
* Just clear the tag if we have an empty cow fork or none at all. It's
* possible the inode was fully unshared since it was originally tagged.
@@ -1597,7 +1584,8 @@ xfs_inode_free_cowblocks(
* If the mapping is dirty or under writeback we cannot touch the
* CoW fork. Leave it alone if we're in the midst of a directio.
*/
- if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
+ if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
+ mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
atomic_read(&VFS_I(ip)->i_dio_count))
return 0;
@@ -1614,28 +1602,16 @@ xfs_inode_free_cowblocks(
if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
XFS_ISIZE(ip) < eofb->eof_min_file_size)
return 0;
-
- /*
- * A scan owner implies we already hold the iolock. Skip it in
- * xfs_free_eofblocks() to avoid deadlock. This also eliminates
- * the possibility of EAGAIN being returned.
- */
- if (eofb->eof_scan_owner == ip->i_ino)
- need_iolock = false;
}
/* Free the CoW blocks */
- if (need_iolock) {
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
- xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
- }
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
- if (need_iolock) {
- xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- }
+ xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index a1e02f4708ab..8a7c849b4dea 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,7 +27,6 @@ struct xfs_eofblocks {
kgid_t eof_gid;
prid_t eof_prid;
__u64 eof_min_file_size;
- xfs_ino_t eof_scan_owner;
};
#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
@@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user(
dst->eof_flags = src->eof_flags;
dst->eof_prid = src->eof_prid;
dst->eof_min_file_size = src->eof_min_file_size;
- dst->eof_scan_owner = NULLFSINO;
dst->eof_uid = INVALID_UID;
if (src->eof_flags & XFS_EOF_FLAGS_UID) {
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b9557795eb74..edfa6a55b064 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1692,32 +1692,34 @@ xfs_release(
if (xfs_can_free_eofblocks(ip, false)) {
/*
+ * Check if the inode is being opened, written and closed
+ * frequently and we have delayed allocation blocks outstanding
+ * (e.g. streaming writes from the NFS server), truncating the
+ * blocks past EOF will cause fragmentation to occur.
+ *
+ * In this case don't do the truncation, but we have to be
+ * careful how we detect this case. Blocks beyond EOF show up as
+ * i_delayed_blks even when the inode is clean, so we need to
+ * truncate them away first before checking for a dirty release.
+ * Hence on the first dirty close we will still remove the
+ * speculative allocation, but after that we will leave it in
+ * place.
+ */
+ if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
+ return 0;
+ /*
* If we can't get the iolock just skip truncating the blocks
* past EOF because we could deadlock with the mmap_sem
- * otherwise. We'll get another chance to drop them once the
+ * otherwise. We'll get another chance to drop them once the
* last reference to the inode is dropped, so we'll never leak
* blocks permanently.
- *
- * Further, check if the inode is being opened, written and
- * closed frequently and we have delayed allocation blocks
- * outstanding (e.g. streaming writes from the NFS server),
- * truncating the blocks past EOF will cause fragmentation to
- * occur.
- *
- * In this case don't do the truncation, either, but we have to
- * be careful how we detect this case. Blocks beyond EOF show
- * up as i_delayed_blks even when the inode is clean, so we
- * need to truncate them away first before checking for a dirty
- * release. Hence on the first dirty close we will still remove
- * the speculative allocation, but after that we will leave it
- * in place.
*/
- if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
- return 0;
-
- error = xfs_free_eofblocks(mp, ip, true);
- if (error && error != -EAGAIN)
- return error;
+ if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+ error = xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ if (error)
+ return error;
+ }
/* delalloc blocks after truncation means it really is dirty */
if (ip->i_delayed_blks)
@@ -1792,22 +1794,23 @@ xfs_inactive_ifree(
int error;
/*
- * The ifree transaction might need to allocate blocks for record
- * insertion to the finobt. We don't want to fail here at ENOSPC, so
- * allow ifree to dip into the reserved block pool if necessary.
- *
- * Freeing large sets of inodes generally means freeing inode chunks,
- * directory and file data blocks, so this should be relatively safe.
- * Only under severe circumstances should it be possible to free enough
- * inodes to exhaust the reserve block pool via finobt expansion while
- * at the same time not creating free space in the filesystem.
+ * We try to use a per-AG reservation for any block needed by the finobt
+ * tree, but as the finobt feature predates the per-AG reservation
+ * support a degraded file system might not have enough space for the
+ * reservation at mount time. In that case try to dip into the reserved
+ * pool and pray.
*
* Send a warning if the reservation does happen to fail, as the inode
* now remains allocated and sits on the unlinked list until the fs is
* repaired.
*/
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
- XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
+ if (unlikely(mp->m_inotbt_nores)) {
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
+ XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
+ &tp);
+ } else {
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp);
+ }
if (error) {
if (error == -ENOSPC) {
xfs_warn_ratelimited(mp,
@@ -1903,8 +1906,11 @@ xfs_inactive(
* cache. Post-eof blocks must be freed, lest we end up with
* broken free space accounting.
*/
- if (xfs_can_free_eofblocks(ip, true))
- xfs_free_eofblocks(mp, ip, false);
+ if (xfs_can_free_eofblocks(ip, true)) {
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ }
return;
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c67cfb451fd3..cf1363dbf32b 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1524,7 +1524,7 @@ out_drop_write:
}
STATIC int
-xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
+xfs_getbmap_format(void **ap, struct getbmapx *bmv)
{
struct getbmap __user *base = (struct getbmap __user *)*ap;
@@ -1567,7 +1567,7 @@ xfs_ioc_getbmap(
}
STATIC int
-xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
+xfs_getbmapx_format(void **ap, struct getbmapx *bmv)
{
struct getbmapx __user *base = (struct getbmapx __user *)*ap;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 0d147428971e..01e878044f6b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -162,7 +162,7 @@ xfs_iomap_write_direct(
xfs_fileoff_t last_fsb;
xfs_filblks_t count_fsb, resaligned;
xfs_fsblock_t firstfsb;
- xfs_extlen_t extsz, temp;
+ xfs_extlen_t extsz;
int nimaps;
int quota_flag;
int rt;
@@ -203,14 +203,7 @@ xfs_iomap_write_direct(
}
count_fsb = last_fsb - offset_fsb;
ASSERT(count_fsb > 0);
-
- resaligned = count_fsb;
- if (unlikely(extsz)) {
- if ((temp = do_mod(offset_fsb, extsz)))
- resaligned += temp;
- if ((temp = do_mod(resaligned, extsz)))
- resaligned += extsz - temp;
- }
+ resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz);
if (unlikely(rt)) {
resrtextents = qblocks = resaligned;
@@ -681,11 +674,11 @@ xfs_iomap_write_allocate(
xfs_trans_t *tp;
int nimaps;
int error = 0;
- int flags = 0;
+ int flags = XFS_BMAPI_DELALLOC;
int nres;
if (whichfork == XFS_COW_FORK)
- flags |= XFS_BMAPI_COWFORK;
+ flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
/*
* Make sure that the dquots are there.
@@ -1002,47 +995,31 @@ xfs_file_iomap_begin(
offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
- if (xfs_is_reflink_inode(ip) &&
- (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) {
- shared = xfs_reflink_find_cow_mapping(ip, offset, &imap);
- if (shared) {
- xfs_iunlock(ip, lockmode);
- goto alloc_done;
- }
- ASSERT(!isnullstartblock(imap.br_startblock));
- }
-
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
if (error)
goto out_unlock;
- if ((flags & IOMAP_REPORT) ||
- (xfs_is_reflink_inode(ip) &&
- (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
+ if (flags & IOMAP_REPORT) {
/* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
&trimmed);
if (error)
goto out_unlock;
-
- /*
- * We're here because we're trying to do a directio write to a
- * region that isn't aligned to a filesystem block. If the
- * extent is shared, fall back to buffered mode to handle the
- * RMW.
- */
- if (!(flags & IOMAP_REPORT) && shared) {
- trace_xfs_reflink_bounce_dio_write(ip, &imap);
- error = -EREMCHG;
- goto out_unlock;
- }
}
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
- error = xfs_reflink_reserve_cow(ip, &imap, &shared);
- if (error)
- goto out_unlock;
+ if (flags & IOMAP_DIRECT) {
+ /* may drop and re-acquire the ilock */
+ error = xfs_reflink_allocate_cow(ip, &imap, &shared,
+ &lockmode);
+ if (error)
+ goto out_unlock;
+ } else {
+ error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+ if (error)
+ goto out_unlock;
+ }
end_fsb = imap.br_startoff + imap.br_blockcount;
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
@@ -1071,7 +1048,6 @@ xfs_file_iomap_begin(
if (error)
return error;
-alloc_done:
iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
} else {
@@ -1102,7 +1078,13 @@ xfs_file_iomap_end_delalloc(
xfs_fileoff_t end_fsb;
int error = 0;
+ /* behave as if the write failed if drop writes is enabled */
+ if (xfs_mp_drop_writes(mp))
+ written = 0;
+
start_fsb = XFS_B_TO_FSB(mp, offset + written);
+ if (unlikely(!written))
+ start_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
/*
@@ -1114,6 +1096,9 @@ xfs_file_iomap_end_delalloc(
* blocks in the range, they are ours.
*/
if (start_fsb < end_fsb) {
+ truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
+ XFS_FSB_TO_B(mp, end_fsb) - 1);
+
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
end_fsb - start_fsb);
@@ -1144,7 +1129,7 @@ xfs_file_iomap_end(
return 0;
}
-struct iomap_ops xfs_iomap_ops = {
+const struct iomap_ops xfs_iomap_ops = {
.iomap_begin = xfs_file_iomap_begin,
.iomap_end = xfs_file_iomap_end,
};
@@ -1190,6 +1175,6 @@ out_unlock:
return error;
}
-struct iomap_ops xfs_xattr_iomap_ops = {
+const struct iomap_ops xfs_xattr_iomap_ops = {
.iomap_begin = xfs_xattr_iomap_begin,
};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 6d45cf01fcff..00db3ecea084 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -33,7 +33,27 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
struct xfs_bmbt_irec *);
xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize);
-extern struct iomap_ops xfs_iomap_ops;
-extern struct iomap_ops xfs_xattr_iomap_ops;
+static inline xfs_filblks_t
+xfs_aligned_fsb_count(
+ xfs_fileoff_t offset_fsb,
+ xfs_filblks_t count_fsb,
+ xfs_extlen_t extsz)
+{
+ if (extsz) {
+ xfs_extlen_t align;
+
+ align = do_mod(offset_fsb, extsz);
+ if (align)
+ count_fsb += align;
+ align = do_mod(count_fsb, extsz);
+ if (align)
+ count_fsb += extsz - align;
+ }
+
+ return count_fsb;
+}
+
+extern const struct iomap_ops xfs_iomap_ops;
+extern const struct iomap_ops xfs_xattr_iomap_ops;
#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 308bebb6dfd2..22c16155f1b4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -98,12 +98,27 @@ xfs_init_security(
static void
xfs_dentry_to_name(
struct xfs_name *namep,
+ struct dentry *dentry)
+{
+ namep->name = dentry->d_name.name;
+ namep->len = dentry->d_name.len;
+ namep->type = XFS_DIR3_FT_UNKNOWN;
+}
+
+static int
+xfs_dentry_mode_to_name(
+ struct xfs_name *namep,
struct dentry *dentry,
int mode)
{
namep->name = dentry->d_name.name;
namep->len = dentry->d_name.len;
- namep->type = xfs_mode_to_ftype[(mode & S_IFMT) >> S_SHIFT];
+ namep->type = xfs_mode_to_ftype(mode);
+
+ if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN))
+ return -EFSCORRUPTED;
+
+ return 0;
}
STATIC void
@@ -119,7 +134,7 @@ xfs_cleanup_inode(
* xfs_init_security we must back out.
* ENOSPC can hit here, among other things.
*/
- xfs_dentry_to_name(&teardown, dentry, 0);
+ xfs_dentry_to_name(&teardown, dentry);
xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
}
@@ -154,8 +169,12 @@ xfs_generic_create(
if (error)
return error;
+ /* Verify mode is valid also for tmpfile case */
+ error = xfs_dentry_mode_to_name(&name, dentry, mode);
+ if (unlikely(error))
+ goto out_free_acl;
+
if (!tmpfile) {
- xfs_dentry_to_name(&name, dentry, mode);
error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
} else {
error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
@@ -248,7 +267,7 @@ xfs_vn_lookup(
if (dentry->d_name.len >= MAXNAMELEN)
return ERR_PTR(-ENAMETOOLONG);
- xfs_dentry_to_name(&name, dentry, 0);
+ xfs_dentry_to_name(&name, dentry);
error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
if (unlikely(error)) {
if (unlikely(error != -ENOENT))
@@ -275,7 +294,7 @@ xfs_vn_ci_lookup(
if (dentry->d_name.len >= MAXNAMELEN)
return ERR_PTR(-ENAMETOOLONG);
- xfs_dentry_to_name(&xname, dentry, 0);
+ xfs_dentry_to_name(&xname, dentry);
error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
if (unlikely(error)) {
if (unlikely(error != -ENOENT))
@@ -310,7 +329,9 @@ xfs_vn_link(
struct xfs_name name;
int error;
- xfs_dentry_to_name(&name, dentry, inode->i_mode);
+ error = xfs_dentry_mode_to_name(&name, dentry, inode->i_mode);
+ if (unlikely(error))
+ return error;
error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
if (unlikely(error))
@@ -329,7 +350,7 @@ xfs_vn_unlink(
struct xfs_name name;
int error;
- xfs_dentry_to_name(&name, dentry, 0);
+ xfs_dentry_to_name(&name, dentry);
error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry)));
if (error)
@@ -359,7 +380,9 @@ xfs_vn_symlink(
mode = S_IFLNK |
(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
- xfs_dentry_to_name(&name, dentry, mode);
+ error = xfs_dentry_mode_to_name(&name, dentry, mode);
+ if (unlikely(error))
+ goto out;
error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
if (unlikely(error))
@@ -395,6 +418,7 @@ xfs_vn_rename(
{
struct inode *new_inode = d_inode(ndentry);
int omode = 0;
+ int error;
struct xfs_name oname;
struct xfs_name nname;
@@ -405,8 +429,14 @@ xfs_vn_rename(
if (flags & RENAME_EXCHANGE)
omode = d_inode(ndentry)->i_mode;
- xfs_dentry_to_name(&oname, odentry, omode);
- xfs_dentry_to_name(&nname, ndentry, d_inode(odentry)->i_mode);
+ error = xfs_dentry_mode_to_name(&oname, odentry, omode);
+ if (omode && unlikely(error))
+ return error;
+
+ error = xfs_dentry_mode_to_name(&nname, ndentry,
+ d_inode(odentry)->i_mode);
+ if (unlikely(error))
+ return error;
return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)),
XFS_I(ndir), &nname,
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index e467218c0098..7a989de224f4 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -331,11 +331,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
}
#define ASSERT_ALWAYS(expr) \
- (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+ (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
#ifdef DEBUG
#define ASSERT(expr) \
- (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+ (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
#ifndef STATIC
# define STATIC noinline
@@ -346,7 +346,7 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
#ifdef XFS_WARN
#define ASSERT(expr) \
- (unlikely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__))
+ (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__))
#ifndef STATIC
# define STATIC static noinline
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c39ac14ff540..b1469f0a91a6 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3317,12 +3317,8 @@ xfs_log_force(
xfs_mount_t *mp,
uint flags)
{
- int error;
-
trace_xfs_log_force(mp, 0, _RET_IP_);
- error = _xfs_log_force(mp, flags, NULL);
- if (error)
- xfs_warn(mp, "%s: error %d returned.", __func__, error);
+ _xfs_log_force(mp, flags, NULL);
}
/*
@@ -3466,12 +3462,8 @@ xfs_log_force_lsn(
xfs_lsn_t lsn,
uint flags)
{
- int error;
-
trace_xfs_log_force(mp, lsn, _RET_IP_);
- error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
- if (error)
- xfs_warn(mp, "%s: error %d returned.", __func__, error);
+ _xfs_log_force_lsn(mp, lsn, flags, NULL);
}
/*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index b5e71072fde5..cc5a9f1574e7 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -124,7 +124,6 @@ struct xlog_ticket;
struct xfs_log_item;
struct xfs_item_ops;
struct xfs_trans;
-struct xfs_log_callback;
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index a4ab192e1792..82f1cbcc4de1 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -30,6 +30,9 @@
#include "xfs_trans_priv.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_trace.h"
+
+struct workqueue_struct *xfs_discard_wq;
/*
* Allocate a new ticket. Failing to get a new ticket makes it really hard to
@@ -491,6 +494,75 @@ xlog_cil_free_logvec(
}
}
+static void
+xlog_discard_endio_work(
+ struct work_struct *work)
+{
+ struct xfs_cil_ctx *ctx =
+ container_of(work, struct xfs_cil_ctx, discard_endio_work);
+ struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
+
+ xfs_extent_busy_clear(mp, &ctx->busy_extents, false);
+ kmem_free(ctx);
+}
+
+/*
+ * Queue up the actual completion to a thread to avoid IRQ-safe locking for
+ * pagb_lock. Note that we need a unbounded workqueue, otherwise we might
+ * get the execution delayed up to 30 seconds for weird reasons.
+ */
+static void
+xlog_discard_endio(
+ struct bio *bio)
+{
+ struct xfs_cil_ctx *ctx = bio->bi_private;
+
+ INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work);
+ queue_work(xfs_discard_wq, &ctx->discard_endio_work);
+}
+
+static void
+xlog_discard_busy_extents(
+ struct xfs_mount *mp,
+ struct xfs_cil_ctx *ctx)
+{
+ struct list_head *list = &ctx->busy_extents;
+ struct xfs_extent_busy *busyp;
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int error = 0;
+
+ ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
+
+ blk_start_plug(&plug);
+ list_for_each_entry(busyp, list, list) {
+ trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
+ busyp->length);
+
+ error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+ XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
+ XFS_FSB_TO_BB(mp, busyp->length),
+ GFP_NOFS, 0, &bio);
+ if (error && error != -EOPNOTSUPP) {
+ xfs_info(mp,
+ "discard failed for extent [0x%llx,%u], error %d",
+ (unsigned long long)busyp->bno,
+ busyp->length,
+ error);
+ break;
+ }
+ }
+
+ if (bio) {
+ bio->bi_private = ctx;
+ bio->bi_end_io = xlog_discard_endio;
+ submit_bio(bio);
+ } else {
+ xlog_discard_endio_work(&ctx->discard_endio_work);
+ }
+ blk_finish_plug(&plug);
+}
+
/*
* Mark all items committed and clear busy extents. We free the log vector
* chains in a separate pass so that we unpin the log items as quickly as
@@ -525,14 +597,10 @@ xlog_cil_committed(
xlog_cil_free_logvec(ctx->lv_chain);
- if (!list_empty(&ctx->busy_extents)) {
- ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
-
- xfs_discard_extents(mp, &ctx->busy_extents);
- xfs_extent_busy_clear(mp, &ctx->busy_extents, false);
- }
-
- kmem_free(ctx);
+ if (!list_empty(&ctx->busy_extents))
+ xlog_discard_busy_extents(mp, ctx);
+ else
+ kmem_free(ctx);
}
/*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 2b6eec52178e..c2604a5366f2 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -257,6 +257,7 @@ struct xfs_cil_ctx {
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
struct xfs_log_callback log_cb; /* completion callback hook. */
struct list_head committing; /* ctx committing list */
+ struct work_struct discard_endio_work;
};
/*
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9b9540db17a6..450bde68bb75 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -45,6 +45,7 @@
#include "xfs_rmap_btree.h"
#include "xfs_refcount_btree.h"
#include "xfs_reflink.h"
+#include "xfs_extent_busy.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -187,7 +188,7 @@ xfs_initialize_perag(
xfs_agnumber_t *maxagi)
{
xfs_agnumber_t index;
- xfs_agnumber_t first_initialised = 0;
+ xfs_agnumber_t first_initialised = NULLAGNUMBER;
xfs_perag_t *pag;
int error = -ENOMEM;
@@ -202,22 +203,21 @@ xfs_initialize_perag(
xfs_perag_put(pag);
continue;
}
- if (!first_initialised)
- first_initialised = index;
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
if (!pag)
- goto out_unwind;
+ goto out_unwind_new_pags;
pag->pag_agno = index;
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
- goto out_unwind;
+ goto out_free_pag;
+ init_waitqueue_head(&pag->pagb_wait);
if (radix_tree_preload(GFP_NOFS))
- goto out_unwind;
+ goto out_hash_destroy;
spin_lock(&mp->m_perag_lock);
if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
@@ -225,10 +225,13 @@ xfs_initialize_perag(
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
error = -EEXIST;
- goto out_unwind;
+ goto out_hash_destroy;
}
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
+ /* first new pag is fully initialized */
+ if (first_initialised == NULLAGNUMBER)
+ first_initialised = index;
}
index = xfs_set_inode_alloc(mp, agcount);
@@ -239,11 +242,16 @@ xfs_initialize_perag(
mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
return 0;
-out_unwind:
+out_hash_destroy:
xfs_buf_hash_destroy(pag);
+out_free_pag:
kmem_free(pag);
- for (; index > first_initialised; index--) {
+out_unwind_new_pags:
+ /* unwind any prior newly initialized pags */
+ for (index = first_initialised; index < agcount; index++) {
pag = radix_tree_delete(&mp->m_perag_tree, index);
+ if (!pag)
+ break;
xfs_buf_hash_destroy(pag);
kmem_free(pag);
}
@@ -1073,6 +1081,13 @@ xfs_unmountfs(
xfs_log_force(mp, XFS_LOG_SYNC);
/*
+ * Wait for all busy extents to be freed, including completion of
+ * any discard operation.
+ */
+ xfs_extent_busy_wait_all(mp);
+ flush_workqueue(xfs_discard_wq);
+
+ /*
* We now need to tell the world we are unmounting. This will allow
* us to detect that the filesystem is going away and we should error
* out anything that we have been retrying in the background. This will
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 84f785218907..6db6fd6b82b0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -140,6 +140,7 @@ typedef struct xfs_mount {
int m_fixedfsid[2]; /* unchanged for life of FS */
uint m_dmevmask; /* DMI events for this FS */
__uint64_t m_flags; /* global mount flags */
+ bool m_inotbt_nores; /* no per-AG finobt resv. */
int m_ialloc_inos; /* inodes in inode allocation */
int m_ialloc_blks; /* blocks in inode allocation */
int m_ialloc_min_blks;/* min blocks in sparse inode
@@ -199,11 +200,12 @@ typedef struct xfs_mount {
/*
* DEBUG mode instrumentation to test and/or trigger delayed allocation
* block killing in the event of failed writes. When enabled, all
- * buffered writes are forced to fail. All delalloc blocks in the range
- * of the write (including pre-existing delalloc blocks!) are tossed as
- * part of the write failure error handling sequence.
+ * buffered writes are silenty dropped and handled as if they failed.
+ * All delalloc blocks in the range of the write (including pre-existing
+ * delalloc blocks!) are tossed as part of the write failure error
+ * handling sequence.
*/
- bool m_fail_writes;
+ bool m_drop_writes;
#endif
} xfs_mount_t;
@@ -324,13 +326,13 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
#ifdef DEBUG
static inline bool
-xfs_mp_fail_writes(struct xfs_mount *mp)
+xfs_mp_drop_writes(struct xfs_mount *mp)
{
- return mp->m_fail_writes;
+ return mp->m_drop_writes;
}
#else
static inline bool
-xfs_mp_fail_writes(struct xfs_mount *mp)
+xfs_mp_drop_writes(struct xfs_mount *mp)
{
return 0;
}
@@ -383,6 +385,8 @@ typedef struct xfs_perag {
xfs_agino_t pagl_rightrec;
spinlock_t pagb_lock; /* lock for pagb_tree */
struct rb_root pagb_tree; /* ordered tree of busy extents */
+ unsigned int pagb_gen; /* generation count for pagb_tree */
+ wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 45e50ea90769..b669b123287b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1177,7 +1177,8 @@ xfs_qm_dqusage_adjust(
* the case in all other instances. It's OK that we do this because
* quotacheck is done only at mount time.
*/
- error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
+ error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL,
+ &ip);
if (error) {
*res = BULKSTAT_RV_NOTHING;
return error;
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index fe86a668a57e..6e4c7446c3d4 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -526,13 +526,14 @@ xfs_cui_recover(
xfs_refcount_finish_one_cleanup(tp, rcur, error);
error = xfs_defer_finish(&tp, &dfops, NULL);
if (error)
- goto abort_error;
+ goto abort_defer;
set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
error = xfs_trans_commit(tp);
return error;
abort_error:
xfs_refcount_finish_one_cleanup(tp, rcur, error);
+abort_defer:
xfs_defer_cancel(&dfops);
xfs_trans_cancel(tp);
return error;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 07593a362cd0..da6d08fb359c 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -82,11 +82,22 @@
* mappings are a reservation against the free space in the filesystem;
* adjacent mappings can also be combined into fewer larger mappings.
*
+ * As an optimization, the CoW extent size hint (cowextsz) creates
+ * outsized aligned delalloc reservations in the hope of landing out of
+ * order nearby CoW writes in a single extent on disk, thereby reducing
+ * fragmentation and improving future performance.
+ *
+ * D: --RRRRRRSSSRRRRRRRR--- (data fork)
+ * C: ------DDDDDDD--------- (CoW fork)
+ *
* When dirty pages are being written out (typically in writepage), the
- * delalloc reservations are converted into real mappings by allocating
- * blocks and replacing the delalloc mapping with real ones. A delalloc
- * mapping can be replaced by several real ones if the free space is
- * fragmented.
+ * delalloc reservations are converted into unwritten mappings by
+ * allocating blocks and replacing the delalloc mapping with real ones.
+ * A delalloc mapping can be replaced by several unwritten ones if the
+ * free space is fragmented.
+ *
+ * D: --RRRRRRSSSRRRRRRRR---
+ * C: ------UUUUUUU---------
*
* We want to adapt the delalloc mechanism for copy-on-write, since the
* write paths are similar. The first two steps (creating the reservation
@@ -101,13 +112,29 @@
* Block-aligned directio writes will use the same mechanism as buffered
* writes.
*
+ * Just prior to submitting the actual disk write requests, we convert
+ * the extents representing the range of the file actually being written
+ * (as opposed to extra pieces created for the cowextsize hint) to real
+ * extents. This will become important in the next step:
+ *
+ * D: --RRRRRRSSSRRRRRRRR---
+ * C: ------UUrrUUU---------
+ *
* CoW remapping must be done after the data block write completes,
* because we don't want to destroy the old data fork map until we're sure
* the new block has been written. Since the new mappings are kept in a
* separate fork, we can simply iterate these mappings to find the ones
* that cover the file blocks that we just CoW'd. For each extent, simply
* unmap the corresponding range in the data fork, map the new range into
- * the data fork, and remove the extent from the CoW fork.
+ * the data fork, and remove the extent from the CoW fork. Because of
+ * the presence of the cowextsize hint, however, we must be careful
+ * only to remap the blocks that we've actually written out -- we must
+ * never remap delalloc reservations nor CoW staging blocks that have
+ * yet to be written. This corresponds exactly to the real extents in
+ * the CoW fork:
+ *
+ * D: --RRRRRRrrSRRRRRRRR---
+ * C: ------UU--UUU---------
*
* Since the remapping operation can be applied to an arbitrary file
* range, we record the need for the remap step as a flag in the ioend
@@ -296,103 +323,165 @@ xfs_reflink_reserve_cow(
return 0;
}
-/* Allocate all CoW reservations covering a range of blocks in a file. */
-static int
-__xfs_reflink_allocate_cow(
- struct xfs_inode *ip,
- xfs_fileoff_t *offset_fsb,
- xfs_fileoff_t end_fsb)
+/* Convert part of an unwritten CoW extent to a real one. */
+STATIC int
+xfs_reflink_convert_cow_extent(
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ xfs_fileoff_t offset_fsb,
+ xfs_filblks_t count_fsb,
+ struct xfs_defer_ops *dfops)
{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_bmbt_irec imap;
- struct xfs_defer_ops dfops;
- struct xfs_trans *tp;
- xfs_fsblock_t first_block;
- int nimaps = 1, error;
- bool shared;
-
- xfs_defer_init(&dfops, &first_block);
+ xfs_fsblock_t first_block;
+ int nimaps = 1;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
- XFS_TRANS_RESERVE, &tp);
- if (error)
- return error;
+ if (imap->br_state == XFS_EXT_NORM)
+ return 0;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trim_extent(imap, offset_fsb, count_fsb);
+ trace_xfs_reflink_convert_cow(ip, imap);
+ if (imap->br_blockcount == 0)
+ return 0;
+ return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount,
+ XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
+ 0, imap, &nimaps, dfops);
+}
- /* Read extent from the source file. */
- nimaps = 1;
- error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
- &imap, &nimaps, 0);
- if (error)
- goto out_unlock;
- ASSERT(nimaps == 1);
+/* Convert all of the unwritten CoW extents in a file's range to real ones. */
+int
+xfs_reflink_convert_cow(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t count)
+{
+ struct xfs_bmbt_irec got;
+ struct xfs_defer_ops dfops;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
+ xfs_extnum_t idx;
+ bool found;
+ int error = 0;
- error = xfs_reflink_reserve_cow(ip, &imap, &shared);
- if (error)
- goto out_trans_cancel;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (!shared) {
- *offset_fsb = imap.br_startoff + imap.br_blockcount;
- goto out_trans_cancel;
+ /* Convert all the extents to real from unwritten. */
+ for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
+ found && got.br_startoff < end_fsb;
+ found = xfs_iext_get_extent(ifp, ++idx, &got)) {
+ error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
+ end_fsb - offset_fsb, &dfops);
+ if (error)
+ break;
}
- xfs_trans_ijoin(tp, ip, 0);
- error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
- XFS_BMAPI_COWFORK, &first_block,
- XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
- &imap, &nimaps, &dfops);
- if (error)
- goto out_trans_cancel;
-
- error = xfs_defer_finish(&tp, &dfops, NULL);
- if (error)
- goto out_trans_cancel;
-
- error = xfs_trans_commit(tp);
-
- *offset_fsb = imap.br_startoff + imap.br_blockcount;
-out_unlock:
+ /* Finish up. */
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
-out_trans_cancel:
- xfs_defer_cancel(&dfops);
- xfs_trans_cancel(tp);
- goto out_unlock;
}
-/* Allocate all CoW reservations covering a part of a file. */
+/* Allocate all CoW reservations covering a range of blocks in a file. */
int
-xfs_reflink_allocate_cow_range(
+xfs_reflink_allocate_cow(
struct xfs_inode *ip,
- xfs_off_t offset,
- xfs_off_t count)
+ struct xfs_bmbt_irec *imap,
+ bool *shared,
+ uint *lockmode)
{
struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
- xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
- int error;
+ xfs_fileoff_t offset_fsb = imap->br_startoff;
+ xfs_filblks_t count_fsb = imap->br_blockcount;
+ struct xfs_bmbt_irec got;
+ struct xfs_defer_ops dfops;
+ struct xfs_trans *tp = NULL;
+ xfs_fsblock_t first_block;
+ int nimaps, error = 0;
+ bool trimmed;
+ xfs_filblks_t resaligned;
+ xfs_extlen_t resblks = 0;
+ xfs_extnum_t idx;
+retry:
ASSERT(xfs_is_reflink_inode(ip));
-
- trace_xfs_reflink_allocate_cow_range(ip, offset, count);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
/*
- * Make sure that the dquots are there.
+ * Even if the extent is not shared we might have a preallocation for
+ * it in the COW fork. If so use it.
*/
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
+ if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
+ got.br_startoff <= offset_fsb) {
+ *shared = true;
- while (offset_fsb < end_fsb) {
- error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
- if (error) {
- trace_xfs_reflink_allocate_cow_range_error(ip, error,
- _RET_IP_);
- break;
+ /* If we have a real allocation in the COW fork we're done. */
+ if (!isnullstartblock(got.br_startblock)) {
+ xfs_trim_extent(&got, offset_fsb, count_fsb);
+ *imap = got;
+ goto convert;
}
+
+ xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
+ } else {
+ error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
+ if (error || !*shared)
+ goto out;
+ }
+
+ if (!tp) {
+ resaligned = xfs_aligned_fsb_count(imap->br_startoff,
+ imap->br_blockcount, xfs_get_cowextsz_hint(ip));
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+
+ xfs_iunlock(ip, *lockmode);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+ *lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, *lockmode);
+
+ if (error)
+ return error;
+
+ error = xfs_qm_dqattach_locked(ip, 0);
+ if (error)
+ goto out;
+ goto retry;
}
+ error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out;
+
+ xfs_trans_ijoin(tp, ip, 0);
+
+ xfs_defer_init(&dfops, &first_block);
+ nimaps = 1;
+
+ /* Allocate the entire reservation as unwritten blocks. */
+ error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
+ XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
+ resblks, imap, &nimaps, &dfops);
+ if (error)
+ goto out_bmap_cancel;
+
+ /* Finish up. */
+ error = xfs_defer_finish(&tp, &dfops, NULL);
+ if (error)
+ goto out_bmap_cancel;
+
+ error = xfs_trans_commit(tp);
+ if (error)
+ return error;
+convert:
+ return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
+ &dfops);
+out_bmap_cancel:
+ xfs_defer_cancel(&dfops);
+ xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
+ XFS_QMOPT_RES_REGBLKS);
+out:
+ if (tp)
+ xfs_trans_cancel(tp);
return error;
}
@@ -641,6 +730,16 @@ xfs_reflink_end_cow(
ASSERT(!isnullstartblock(got.br_startblock));
+ /*
+ * Don't remap unwritten extents; these are
+ * speculatively preallocated CoW extents that have been
+ * allocated but have not yet been involved in a write.
+ */
+ if (got.br_state == XFS_EXT_UNWRITTEN) {
+ idx--;
+ goto next_extent;
+ }
+
/* Unmap the old blocks in the data fork. */
xfs_defer_init(&dfops, &firstfsb);
rlen = del.br_blockcount;
@@ -855,13 +954,14 @@ STATIC int
xfs_reflink_update_dest(
struct xfs_inode *dest,
xfs_off_t newlen,
- xfs_extlen_t cowextsize)
+ xfs_extlen_t cowextsize,
+ bool is_dedupe)
{
struct xfs_mount *mp = dest->i_mount;
struct xfs_trans *tp;
int error;
- if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+ if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
return 0;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -882,6 +982,10 @@ xfs_reflink_update_dest(
dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
}
+ if (!is_dedupe) {
+ xfs_trans_ichgtime(tp, dest,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ }
xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
error = xfs_trans_commit(tp);
@@ -1195,7 +1299,8 @@ xfs_reflink_remap_range(
!(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
cowextsize = src->i_d.di_cowextsize;
- ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
+ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+ is_dedupe);
out_unlock:
xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index aa6a4d64bd35..33ac9b8db683 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -28,8 +28,10 @@ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
struct xfs_bmbt_irec *imap, bool *shared);
-extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
- xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode);
+extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
struct xfs_bmbt_irec *imap);
extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index eecbaac08eba..890862f2447c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1956,12 +1956,20 @@ xfs_init_workqueues(void)
if (!xfs_alloc_wq)
return -ENOMEM;
+ xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
+ if (!xfs_discard_wq)
+ goto out_free_alloc_wq;
+
return 0;
+out_free_alloc_wq:
+ destroy_workqueue(xfs_alloc_wq);
+ return -ENOMEM;
}
STATIC void
xfs_destroy_workqueues(void)
{
+ destroy_workqueue(xfs_discard_wq);
destroy_workqueue(xfs_alloc_wq);
}
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index b6418abd85ad..5f2f32408011 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -73,6 +73,8 @@ extern const struct quotactl_ops xfs_quotactl_operations;
extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
+extern struct workqueue_struct *xfs_discard_wq;
+
#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 276d3023d60f..80ac15fb9638 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -93,7 +93,7 @@ to_mp(struct kobject *kobject)
#ifdef DEBUG
STATIC ssize_t
-fail_writes_store(
+drop_writes_store(
struct kobject *kobject,
const char *buf,
size_t count)
@@ -107,9 +107,9 @@ fail_writes_store(
return ret;
if (val == 1)
- mp->m_fail_writes = true;
+ mp->m_drop_writes = true;
else if (val == 0)
- mp->m_fail_writes = false;
+ mp->m_drop_writes = false;
else
return -EINVAL;
@@ -117,21 +117,21 @@ fail_writes_store(
}
STATIC ssize_t
-fail_writes_show(
+drop_writes_show(
struct kobject *kobject,
char *buf)
{
struct xfs_mount *mp = to_mp(kobject);
- return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0);
+ return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_drop_writes ? 1 : 0);
}
-XFS_SYSFS_ATTR_RW(fail_writes);
+XFS_SYSFS_ATTR_RW(drop_writes);
#endif /* DEBUG */
static struct attribute *xfs_mp_attrs[] = {
#ifdef DEBUG
- ATTR_LIST(fail_writes),
+ ATTR_LIST(drop_writes),
#endif
NULL,
};
@@ -396,7 +396,7 @@ max_retries_show(
int retries;
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
- if (cfg->retry_timeout == XFS_ERR_RETRY_FOREVER)
+ if (cfg->max_retries == XFS_ERR_RETRY_FOREVER)
retries = -1;
else
retries = cfg->max_retries;
@@ -422,7 +422,7 @@ max_retries_store(
return -EINVAL;
if (val == -1)
- cfg->retry_timeout = XFS_ERR_RETRY_FOREVER;
+ cfg->max_retries = XFS_ERR_RETRY_FOREVER;
else
cfg->max_retries = val;
return count;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 69c5bcd9a51b..fb7555e73a62 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2245,7 +2245,6 @@ DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range);
/* deferred ops */
struct xfs_defer_pending;
-struct xfs_defer_intake;
struct xfs_defer_ops;
DECLARE_EVENT_CLASS(xfs_defer_class,
@@ -3089,6 +3088,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
__field(xfs_fileoff_t, lblk)
__field(xfs_extlen_t, len)
__field(xfs_fsblock_t, pblk)
+ __field(int, state)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
@@ -3096,13 +3096,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
__entry->lblk = irec->br_startoff;
__entry->len = irec->br_blockcount;
__entry->pblk = irec->br_startblock;
+ __entry->state = irec->br_state;
),
- TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu",
+ TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->lblk,
__entry->len,
- __entry->pblk)
+ __entry->pblk,
+ __entry->state)
);
#define DEFINE_INODE_IREC_EVENT(name) \
DEFINE_EVENT(xfs_inode_irec_class, name, \
@@ -3242,11 +3244,11 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
+DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
-DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
+DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
@@ -3254,7 +3256,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 61b7fbdd3ebd..1646f659b60f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -32,7 +32,6 @@ struct xfs_mount;
struct xfs_trans;
struct xfs_trans_res;
struct xfs_dquot_acct;
-struct xfs_busy_extent;
struct xfs_rud_log_item;
struct xfs_rui_log_item;
struct xfs_btree_cur;