From 84a80f62f71beac20a426709c04b49f2bd352291 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Mar 2015 15:23:35 -0400 Subject: NFSv4.1: Convert pNFS deviceid to use kfree_rcu() Use of synchronize_rcu() when unmounting and potentially freeing a lot of deviceids is problematic. There really is no reason why we can't just use kfree_rcu() here. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 635f0865671c..a1fc16c971a7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -302,6 +302,7 @@ struct nfs4_deviceid_node { unsigned long flags; unsigned long timestamp_unavailable; struct nfs4_deviceid deviceid; + struct rcu_head rcu; atomic_t ref; }; -- cgit v1.2.3 From df52699e4fcefe30ebe4f1db48bd161254a0b102 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Mar 2015 14:48:32 -0400 Subject: NFSv4.1: Don't cache deviceids that have no notifications The spec says that once all layouts that reference a given deviceid have been returned, then we are only allowed to continue to cache the deviceid if the metadata server supports notifications. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/pnfs.h | 2 ++ fs/nfs/pnfs_dev.c | 9 +++++++++ 3 files changed, 13 insertions(+) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ba8b2b5e98a1..9ff8c63c9cac 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7962,6 +7962,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (res.notification & ~args.notify_types) dprintk("%s: unsupported notification\n", __func__); + if (res.notification != args.notify_types) + pdev->nocache = 1; dprintk("<-- %s status=%d\n", __func__, status); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a1fc16c971a7..b5654e8da936 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -203,6 +203,7 @@ struct pnfs_device { struct page **pages; unsigned int pgbase; unsigned int pglen; /* reply buffer length */ + unsigned char nocache : 1;/* May not be cached */ }; #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 @@ -291,6 +292,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ + NFS_DEVICEID_NOCACHE, /* device may not be cached */ }; /* pnfs_dev.c */ diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 7e07f4ba4822..2961fcd7a2df 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -149,6 +149,8 @@ nfs4_get_device_info(struct nfs_server *server, */ d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, gfp_flags); + if (d && pdev->nocache) + set_bit(NFS_DEVICEID_NOCACHE, &d->flags); out_free_pages: for (i = 0; i < max_pages; i++) @@ -235,6 +237,7 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, return; } hlist_del_init_rcu(&d->node); + clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); spin_unlock(&nfs4_deviceid_lock); /* balance the initial ref set in pnfs_insert_deviceid */ @@ -269,6 +272,11 @@ EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) { + if (test_bit(NFS_DEVICEID_NOCACHE, &d->flags)) { + if (atomic_add_unless(&d->ref, -1, 2)) + return false; + nfs4_delete_deviceid(d->ld, d->nfs_client, &d->deviceid); + } if (!atomic_dec_and_test(&d->ref)) return false; d->ld->free_deviceid_node(d); @@ -312,6 +320,7 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash) if (d->nfs_client == clp && atomic_read(&d->ref)) { hlist_del_init_rcu(&d->node); hlist_add_head(&d->tmpnode, &tmp); + clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); } rcu_read_unlock(); spin_unlock(&nfs4_deviceid_lock); -- cgit v1.2.3 From 67af7611ec57dbcbc96f9d9daa4d3c7b0999aa73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 20:40:38 -0400 Subject: NFSv4.1/pnfs: Refactor pnfs_set_layoutcommit() pnfs_set_layoutcommit() and pnfs_commit_set_layoutcommit() are 100% identical except for the function arguments. Refactor to eliminate the difference. Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 5 ++-- fs/nfs/flexfilelayout/flexfilelayout.c | 5 ++-- fs/nfs/pnfs.c | 43 ++++++---------------------------- fs/nfs/pnfs.h | 3 +-- 4 files changed, 14 insertions(+), 42 deletions(-) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 5639a2ef671a..a317b007e436 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -258,7 +258,8 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) hdr->res.verf->committed != NFS_DATA_SYNC) return; - pnfs_set_layoutcommit(hdr); + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, + hdr->mds_offset + hdr->res.count); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } @@ -373,7 +374,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, } if (data->verf.committed == NFS_UNSTABLE) - pnfs_commit_set_layoutcommit(data); + pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); return 0; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index edfb27ea4ce2..92d2943e54f8 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -891,7 +891,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, static void ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) { - pnfs_set_layoutcommit(hdr); + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, + hdr->mds_offset + hdr->res.count); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } @@ -1074,7 +1075,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, } if (data->verf.committed == NFS_UNSTABLE) - pnfs_commit_set_layoutcommit(data); + pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); return 0; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b96736df98e8..ea83f3c03c65 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1841,7 +1841,8 @@ void pnfs_ld_write_done(struct nfs_pgio_header *hdr) { trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); if (!hdr->pnfs_error) { - pnfs_set_layoutcommit(hdr); + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, + hdr->mds_offset + hdr->res.count); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); } else pnfs_ld_handle_write_error(hdr); @@ -2099,11 +2100,10 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void -pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) +pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, + loff_t end_pos) { - struct inode *inode = hdr->inode; struct nfs_inode *nfsi = NFS_I(inode); - loff_t end_pos = hdr->mds_offset + hdr->res.count; bool mark_as_dirty = false; spin_lock(&inode->i_lock); @@ -2114,13 +2114,13 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) __func__, inode->i_ino); } else if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; - if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { + if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { /* references matched in nfs4_layoutcommit_release */ - pnfs_get_lseg(hdr->lseg); + pnfs_get_lseg(lseg); } spin_unlock(&inode->i_lock); dprintk("%s: lseg %p end_pos %llu\n", - __func__, hdr->lseg, nfsi->layout->plh_lwb); + __func__, lseg, nfsi->layout->plh_lwb); /* if pnfs_layoutcommit_inode() runs between inode locks, the next one * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ @@ -2129,35 +2129,6 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); -void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) -{ - struct inode *inode = data->inode; - struct nfs_inode *nfsi = NFS_I(inode); - bool mark_as_dirty = false; - - spin_lock(&inode->i_lock); - if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { - nfsi->layout->plh_lwb = data->lwb; - mark_as_dirty = true; - dprintk("%s: Set layoutcommit for inode %lu ", - __func__, inode->i_ino); - } else if (data->lwb > nfsi->layout->plh_lwb) - nfsi->layout->plh_lwb = data->lwb; - if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { - /* references matched in nfs4_layoutcommit_release */ - pnfs_get_lseg(data->lseg); - } - spin_unlock(&inode->i_lock); - dprintk("%s: lseg %p end_pos %llu\n", - __func__, data->lseg, nfsi->layout->plh_lwb); - - /* if pnfs_layoutcommit_inode() runs between inode locks, the next one - * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ - if (mark_as_dirty) - mark_inode_dirty_sync(inode); -} -EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); - void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) { struct nfs_server *nfss = NFS_SERVER(data->args.inode); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index b5654e8da936..66bf5e1cf93d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -264,8 +264,7 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); -void pnfs_set_layoutcommit(struct nfs_pgio_header *); -void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); +void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); -- cgit v1.2.3 From 5bb89b4702e22981445ae01af733a57d1cae2018 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 14:14:42 -0400 Subject: NFSv4.1/pnfs: Separate out metadata and data consistency for pNFS The LAYOUTCOMMIT operation means different things to different layout types. For blocks and objects, it is both a data and metadata consistency operation. For files and flexfiles, it is only a metadata consistency operation. This patch separates out the 2 cases, allowing the files/flexfiles layout drivers to optimise away the data consistency calls to layoutcommit. Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 1 + fs/nfs/filelayout/filelayout.c | 1 + fs/nfs/flexfilelayout/flexfilelayout.c | 1 + fs/nfs/nfs4file.c | 2 +- fs/nfs/objlayout/objio_osd.c | 2 ++ fs/nfs/pnfs.c | 7 +++++++ fs/nfs/pnfs.h | 18 ++++++++++++++++++ fs/nfs/pnfs_nfs.c | 10 ++++++++++ fs/nfs/write.c | 13 ++++++------- 9 files changed, 47 insertions(+), 8 deletions(-) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 1cac3c175d18..d2554fe140a3 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -890,6 +890,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = { .free_deviceid_node = bl_free_deviceid_node, .pg_read_ops = &bl_pg_read_ops, .pg_write_ops = &bl_pg_write_ops, + .sync = pnfs_generic_sync, }; static int __init nfs4blocklayout_init(void) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index a317b007e436..a46bf6de9ce4 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1139,6 +1139,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .write_pagelist = filelayout_write_pagelist, .alloc_deviceid_node = filelayout_alloc_deviceid_node, .free_deviceid_node = filelayout_free_deviceid_node, + .sync = pnfs_nfs_generic_sync, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 92d2943e54f8..f3ff66e4fb6a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1509,6 +1509,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .write_pagelist = ff_layout_write_pagelist, .alloc_deviceid_node = ff_layout_alloc_deviceid_node, .encode_layoutreturn = ff_layout_encode_layoutreturn, + .sync = pnfs_nfs_generic_sync, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index befe7a2c6284..866842b048ef 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -112,7 +112,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) mutex_lock(&inode->i_mutex); ret = nfs_file_fsync_commit(file, start, end, datasync); if (!ret) - ret = pnfs_layoutcommit_inode(inode, true); + ret = pnfs_sync_inode(inode, !!datasync); mutex_unlock(&inode->i_mutex); /* * If nfs_file_fsync_commit detected a server reboot, then diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8b5e0e687d5e..5aaed363556a 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -637,6 +637,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { .pg_read_ops = &objio_pg_read_ops, .pg_write_ops = &objio_pg_write_ops, + .sync = pnfs_generic_sync, + .free_deviceid_node = objio_free_deviceid_node, .encode_layoutcommit = objlayout_encode_layoutcommit, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c2ce2db771b7..3f0affebcf05 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2231,6 +2231,13 @@ clear_layoutcommitting: } EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); +int +pnfs_generic_sync(struct inode *inode, bool datasync) +{ + return pnfs_layoutcommit_inode(inode, true); +} +EXPORT_SYMBOL_GPL(pnfs_generic_sync); + struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) { struct nfs4_threshold *thp; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 66bf5e1cf93d..231eb23c22da 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -155,6 +155,8 @@ struct pnfs_layoutdriver_type { int how, struct nfs_commit_info *cinfo); + int (*sync)(struct inode *inode, bool datasync); + /* * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS @@ -267,6 +269,8 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); +int pnfs_generic_sync(struct inode *inode, bool datasync); +int pnfs_nfs_generic_sync(struct inode *inode, bool datasync); int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); void pnfs_ld_write_done(struct nfs_pgio_header *); @@ -488,6 +492,14 @@ pnfs_ld_read_whole_page(struct inode *inode) return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; } +static inline int +pnfs_sync_inode(struct inode *inode, bool datasync) +{ + if (!pnfs_enabled_sb(NFS_SERVER(inode))) + return 0; + return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync); +} + static inline bool pnfs_layoutcommit_outstanding(struct inode *inode) { @@ -570,6 +582,12 @@ pnfs_ld_read_whole_page(struct inode *inode) return false; } +static inline int +pnfs_sync_inode(struct inode *inode, bool datasync) +{ + return 0; +} + static inline bool pnfs_roc(struct inode *ino) { diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 54e36b38fb5f..64d2a5932f7b 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -868,3 +868,13 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, nfs_request_add_commit_list(req, list, cinfo); } EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); + +int +pnfs_nfs_generic_sync(struct inode *inode, bool datasync) +{ + if (datasync) + return 0; + return pnfs_layoutcommit_inode(inode, true); +} +EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync); + diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 849ed784d6ac..7f933a39f385 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1840,17 +1840,16 @@ EXPORT_SYMBOL_GPL(nfs_write_inode); */ int nfs_wb_all(struct inode *inode) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = 0, - .range_end = LLONG_MAX, - }; int ret; trace_nfs_writeback_inode_enter(inode); - ret = sync_inode(inode, &wbc); + ret = filemap_write_and_wait(inode->i_mapping); + if (!ret) { + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (!ret) + pnfs_sync_inode(inode, true); + } trace_nfs_writeback_inode_exit(inode, ret); return ret; -- cgit v1.2.3