summaryrefslogtreecommitdiff
path: root/fs/afs/inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-20 15:49:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-20 15:49:02 -0700
commitd9fb678414c048e185eaddadd18d75f5e8832ff3 (patch)
treebb113cb617da0e1264e9b27dc93c7cd22d18b027 /fs/afs/inode.c
parent707a63e9a9dd55432d47bf40457d4a3413888dcc (diff)
parent9d37e1cab2a9d2cee2737973fa455e6f89eee46a (diff)
Merge tag 'afs-fixes-20210913' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull AFS fixes from David Howells: "Fixes for AFS problems that can cause data corruption due to interaction with another client modifying data cached locally: - When d_revalidating a dentry, don't look at the inode to which it points. Only check the directory to which the dentry belongs. This was confusing things and causing the silly-rename cleanup code to remove the file now at the dentry of a file that got deleted. - Fix mmap data coherency. When a callback break is received that relates to a file that we have cached, the data content may have been changed (there are other reasons, such as the user's rights having been changed). However, we're checking it lazily, only on entry to the kernel, which doesn't happen if we have a writeable shared mapped page on that file. We make the kernel keep track of mmapped files and clear all PTEs mapping to that file as soon as the callback comes in by calling unmap_mapping_pages() (we don't necessarily want to zap the pagecache). This causes the kernel to be reentered when userspace tries to access the mmapped address range again - and at that point we can query the server and, if we need to, zap the page cache. Ideally, I would check each file at the point of notification, but that involves poking the server[*] - which is holding an exclusive lock on the vnode it is changing, waiting for all the clients it notified to reply. This could then deadlock against the server. Further, invalidating the pagecache might call ->launder_page(), which would try to write to the file, which would definitely deadlock. (AFS doesn't lease file access). [*] Checking to see if the file content has changed is a matter of comparing the current data version number, but we have to ask the server for that. We also need to get a new callback promise and we need to poke the server for that too. - Add some more points at which the inode is validated, since we're doing it lazily, notably in ->read_iter() and ->page_mkwrite(), but also when performing some directory operations. Ideally, checking in ->read_iter() would be done in some derivation of filemap_read(). If we're going to call the server to read the file, then we get the file status fetch as part of that. - The above is now causing us to make a lot more calls to afs_validate() to check the inode - and afs_validate() takes the RCU read lock each time to make a quick check (ie. afs_check_validity()). This is entirely for the purpose of checking cb_s_break to see if the server we're using reinitialised its list of callbacks - however this isn't a very common event, so most of the time we're taking this needlessly. Add a new cell-wide counter to count the number of reinitialisations done by any server and check that - and only if that changes, take the RCU read lock and check the server list (the server list may change, but the cell a file is part of won't). - Don't update vnode->cb_s_break and ->cb_v_break inside the validity checking loop. The cb_lock is done with read_seqretry, so we might go round the loop a second time after resetting those values - and that could cause someone else checking validity to miss something (I think). Also included are patches for fixes for some bugs encountered whilst debugging this: - Fix a leak of afs_read objects and fix a leak of keys hidden by that. - Fix a leak of pages that couldn't be added to extend a writeback. - Fix the maintenance of i_blocks when i_size is changed by a local write or a local dir edit" Link: https://bugzilla.kernel.org/show_bug.cgi?id=214217 [1] Link: https://lore.kernel.org/r/163111665183.283156.17200205573146438918.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163113612442.352844.11162345591911691150.stgit@warthog.procyon.org.uk/ # i_blocks patch * tag 'afs-fixes-20210913' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: afs: Fix updating of i_blocks on file/dir extension afs: Fix corruption in reads at fpos 2G-4G from an OpenAFS server afs: Try to avoid taking RCU read lock when checking vnode validity afs: Fix mmap coherency vs 3rd-party changes afs: Fix incorrect triggering of sillyrename on 3rd-party invalidation afs: Add missing vnode validation checks afs: Fix page leak afs: Fix missing put on afs_read objects and missing get on the key therein
Diffstat (limited to 'fs/afs/inode.c')
-rw-r--r--fs/afs/inode.c98
1 files changed, 43 insertions, 55 deletions
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 80b6c8d967d5..8fcffea2daf5 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -54,16 +54,6 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
}
/*
- * Set the file size and block count. Estimate the number of 512 bytes blocks
- * used, rounded up to nearest 1K for consistency with other AFS clients.
- */
-static void afs_set_i_size(struct afs_vnode *vnode, u64 size)
-{
- i_size_write(&vnode->vfs_inode, size);
- vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1;
-}
-
-/*
* Initialise an inode from the vnode status.
*/
static int afs_inode_init_from_status(struct afs_operation *op,
@@ -587,22 +577,32 @@ static void afs_zap_data(struct afs_vnode *vnode)
}
/*
- * Get the server reinit counter for a vnode's current server.
+ * Check to see if we have a server currently serving this volume and that it
+ * hasn't been reinitialised or dropped from the list.
*/
-static bool afs_get_s_break_rcu(struct afs_vnode *vnode, unsigned int *_s_break)
+static bool afs_check_server_good(struct afs_vnode *vnode)
{
- struct afs_server_list *slist = rcu_dereference(vnode->volume->servers);
+ struct afs_server_list *slist;
struct afs_server *server;
+ bool good;
int i;
+ if (vnode->cb_fs_s_break == atomic_read(&vnode->volume->cell->fs_s_break))
+ return true;
+
+ rcu_read_lock();
+
+ slist = rcu_dereference(vnode->volume->servers);
for (i = 0; i < slist->nr_servers; i++) {
server = slist->servers[i].server;
if (server == vnode->cb_server) {
- *_s_break = READ_ONCE(server->cb_s_break);
- return true;
+ good = (vnode->cb_s_break == server->cb_s_break);
+ rcu_read_unlock();
+ return good;
}
}
+ rcu_read_unlock();
return false;
}
@@ -611,57 +611,46 @@ static bool afs_get_s_break_rcu(struct afs_vnode *vnode, unsigned int *_s_break)
*/
bool afs_check_validity(struct afs_vnode *vnode)
{
- struct afs_volume *volume = vnode->volume;
enum afs_cb_break_reason need_clear = afs_cb_break_no_break;
time64_t now = ktime_get_real_seconds();
- bool valid;
- unsigned int cb_break, cb_s_break, cb_v_break;
+ unsigned int cb_break;
int seq = 0;
do {
read_seqbegin_or_lock(&vnode->cb_lock, &seq);
- cb_v_break = READ_ONCE(volume->cb_v_break);
cb_break = vnode->cb_break;
- if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) &&
- afs_get_s_break_rcu(vnode, &cb_s_break)) {
- if (vnode->cb_s_break != cb_s_break ||
- vnode->cb_v_break != cb_v_break) {
- vnode->cb_s_break = cb_s_break;
- vnode->cb_v_break = cb_v_break;
- need_clear = afs_cb_break_for_vsbreak;
- valid = false;
- } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ if (vnode->cb_v_break != vnode->volume->cb_v_break)
+ need_clear = afs_cb_break_for_v_break;
+ else if (!afs_check_server_good(vnode))
+ need_clear = afs_cb_break_for_s_reinit;
+ else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
need_clear = afs_cb_break_for_zap;
- valid = false;
- } else if (vnode->cb_expires_at - 10 <= now) {
+ else if (vnode->cb_expires_at - 10 <= now)
need_clear = afs_cb_break_for_lapsed;
- valid = false;
- } else {
- valid = true;
- }
} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- valid = true;
+ ;
} else {
- vnode->cb_v_break = cb_v_break;
- valid = false;
+ need_clear = afs_cb_break_no_promise;
}
} while (need_seqretry(&vnode->cb_lock, seq));
done_seqretry(&vnode->cb_lock, seq);
- if (need_clear != afs_cb_break_no_break) {
- write_seqlock(&vnode->cb_lock);
- if (cb_break == vnode->cb_break)
- __afs_break_callback(vnode, need_clear);
- else
- trace_afs_cb_miss(&vnode->fid, need_clear);
- write_sequnlock(&vnode->cb_lock);
- valid = false;
- }
+ if (need_clear == afs_cb_break_no_break)
+ return true;
- return valid;
+ write_seqlock(&vnode->cb_lock);
+ if (need_clear == afs_cb_break_no_promise)
+ vnode->cb_v_break = vnode->volume->cb_v_break;
+ else if (cb_break == vnode->cb_break)
+ __afs_break_callback(vnode, need_clear);
+ else
+ trace_afs_cb_miss(&vnode->fid, need_clear);
+ write_sequnlock(&vnode->cb_lock);
+ return false;
}
/*
@@ -675,21 +664,20 @@ bool afs_check_validity(struct afs_vnode *vnode)
*/
int afs_validate(struct afs_vnode *vnode, struct key *key)
{
- bool valid;
int ret;
_enter("{v={%llx:%llu} fl=%lx},%x",
vnode->fid.vid, vnode->fid.vnode, vnode->flags,
key_serial(key));
- rcu_read_lock();
- valid = afs_check_validity(vnode);
- rcu_read_unlock();
-
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
- clear_nlink(&vnode->vfs_inode);
+ if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) {
+ if (vnode->vfs_inode.i_nlink)
+ clear_nlink(&vnode->vfs_inode);
+ goto valid;
+ }
- if (valid)
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) &&
+ afs_check_validity(vnode))
goto valid;
down_write(&vnode->validate_lock);