From 77b4d2c6316ab096e3f77eea240144941434f2a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 18 Sep 2020 09:25:13 +0100 Subject: netfs: Add tracepoints Add three tracepoints to track the activity of the read helpers: (1) netfs/netfs_read This logs entry to the read helpers and also expansion of the range in a readahead request. (2) netfs/netfs_rreq This logs the progress of netfs_read_request objects which track read requests. A read request may be a compound of multiple subrequests. (3) netfs/netfs_sreq This logs the progress of netfs_read_subrequest objects, which track the contributions from various sources to a read request. Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/161118138060.1232039.5353374588021776217.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161033468.2537118.14021843889844001905.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340395843.1303470.7355519662919639648.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539538693.286939.10171713520419106334.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653796447.2770958.1870655382450862155.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789078003.6155.17814844411672989942.stgit@warthog.procyon.org.uk/ # v6 --- include/trace/events/netfs.h | 199 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 include/trace/events/netfs.h (limited to 'include/trace') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h new file mode 100644 index 000000000000..12ad382764c5 --- /dev/null +++ b/include/trace/events/netfs.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Network filesystem support module tracepoints + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM netfs + +#if !defined(_TRACE_NETFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NETFS_H + +#include + +/* + * Define enums for tracing information. + */ +#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY + +enum netfs_read_trace { + netfs_read_trace_expanded, + netfs_read_trace_readahead, + netfs_read_trace_readpage, +}; + +enum netfs_rreq_trace { + netfs_rreq_trace_assess, + netfs_rreq_trace_done, + netfs_rreq_trace_free, + netfs_rreq_trace_resubmit, + netfs_rreq_trace_unlock, + netfs_rreq_trace_unmark, + netfs_rreq_trace_write, +}; + +enum netfs_sreq_trace { + netfs_sreq_trace_download_instead, + netfs_sreq_trace_free, + netfs_sreq_trace_prepare, + netfs_sreq_trace_resubmit_short, + netfs_sreq_trace_submit, + netfs_sreq_trace_terminated, + netfs_sreq_trace_write, + netfs_sreq_trace_write_term, +}; + +#endif + +#define netfs_read_traces \ + EM(netfs_read_trace_expanded, "EXPANDED ") \ + EM(netfs_read_trace_readahead, "READAHEAD") \ + E_(netfs_read_trace_readpage, "READPAGE ") + +#define netfs_rreq_traces \ + EM(netfs_rreq_trace_assess, "ASSESS") \ + EM(netfs_rreq_trace_done, "DONE ") \ + EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_resubmit, "RESUBM") \ + EM(netfs_rreq_trace_unlock, "UNLOCK") \ + EM(netfs_rreq_trace_unmark, "UNMARK") \ + E_(netfs_rreq_trace_write, "WRITE ") + +#define netfs_sreq_sources \ + EM(NETFS_FILL_WITH_ZEROES, "ZERO") \ + EM(NETFS_DOWNLOAD_FROM_SERVER, "DOWN") \ + EM(NETFS_READ_FROM_CACHE, "READ") \ + E_(NETFS_INVALID_READ, "INVL") \ + +#define netfs_sreq_traces \ + EM(netfs_sreq_trace_download_instead, "RDOWN") \ + EM(netfs_sreq_trace_free, "FREE ") \ + EM(netfs_sreq_trace_prepare, "PREP ") \ + EM(netfs_sreq_trace_resubmit_short, "SHORT") \ + EM(netfs_sreq_trace_submit, "SUBMT") \ + EM(netfs_sreq_trace_terminated, "TERM ") \ + EM(netfs_sreq_trace_write, "WRITE") \ + E_(netfs_sreq_trace_write_term, "WTERM") + + +/* + * Export enum symbols via userspace. + */ +#undef EM +#undef E_ +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define E_(a, b) TRACE_DEFINE_ENUM(a); + +netfs_read_traces; +netfs_rreq_traces; +netfs_sreq_sources; +netfs_sreq_traces; + +/* + * Now redefine the EM() and E_() macros to map the enums to the strings that + * will be printed in the output. + */ +#undef EM +#undef E_ +#define EM(a, b) { a, b }, +#define E_(a, b) { a, b } + +TRACE_EVENT(netfs_read, + TP_PROTO(struct netfs_read_request *rreq, + loff_t start, size_t len, + enum netfs_read_trace what), + + TP_ARGS(rreq, start, len, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned int, cookie ) + __field(loff_t, start ) + __field(size_t, len ) + __field(enum netfs_read_trace, what ) + ), + + TP_fast_assign( + __entry->rreq = rreq->debug_id; + __entry->cookie = rreq->cookie_debug_id; + __entry->start = start; + __entry->len = len; + __entry->what = what; + ), + + TP_printk("R=%08x %s c=%08x s=%llx %zx", + __entry->rreq, + __print_symbolic(__entry->what, netfs_read_traces), + __entry->cookie, + __entry->start, __entry->len) + ); + +TRACE_EVENT(netfs_rreq, + TP_PROTO(struct netfs_read_request *rreq, + enum netfs_rreq_trace what), + + TP_ARGS(rreq, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned short, flags ) + __field(enum netfs_rreq_trace, what ) + ), + + TP_fast_assign( + __entry->rreq = rreq->debug_id; + __entry->flags = rreq->flags; + __entry->what = what; + ), + + TP_printk("R=%08x %s f=%02x", + __entry->rreq, + __print_symbolic(__entry->what, netfs_rreq_traces), + __entry->flags) + ); + +TRACE_EVENT(netfs_sreq, + TP_PROTO(struct netfs_read_subrequest *sreq, + enum netfs_sreq_trace what), + + TP_ARGS(sreq, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned short, index ) + __field(short, error ) + __field(unsigned short, flags ) + __field(enum netfs_read_source, source ) + __field(enum netfs_sreq_trace, what ) + __field(size_t, len ) + __field(size_t, transferred ) + __field(loff_t, start ) + ), + + TP_fast_assign( + __entry->rreq = sreq->rreq->debug_id; + __entry->index = sreq->debug_index; + __entry->error = sreq->error; + __entry->flags = sreq->flags; + __entry->source = sreq->source; + __entry->what = what; + __entry->len = sreq->len; + __entry->transferred = sreq->transferred; + __entry->start = sreq->start; + ), + + TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx/%zx e=%d", + __entry->rreq, __entry->index, + __print_symbolic(__entry->what, netfs_sreq_traces), + __print_symbolic(__entry->source, netfs_sreq_sources), + __entry->flags, + __entry->start, __entry->transferred, __entry->len, + __entry->error) + ); + +#endif /* _TRACE_NETFS_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From e1b1240c1ff5f8bfba797f14996d8bac8a9ec437 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Sep 2020 11:06:07 +0100 Subject: netfs: Add write_begin helper Add a helper to do the pre-reading work for the netfs write_begin address space op. Changes v6: - Fixed a missing rreq put in netfs_write_begin()[3]. - Use DEFINE_READAHEAD()[4]. v5: - Made the wait for PG_fscache in netfs_write_begin() killable[2]. v4: - Added flag to netfs_subreq_terminated() to indicate that the caller may have been running async and stuff that might sleep needs punting to a workqueue (can't use in_softirq()[1]). Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1] Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [2] Link: https://lore.kernel.org/r/161781042127.463527.9154479794406046987.stgit@warthog.procyon.org.uk/ [3] Link: https://lore.kernel.org/r/1234933.1617886271@warthog.procyon.org.uk/ [4] Link: https://lore.kernel.org/r/160588543960.3465195.2792938973035886168.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118140165.1232039.16418853874312234477.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161035539.2537118.15674887534950908530.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340398368.1303470.11242918276563276090.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539541541.286939.1889738674057013729.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653798616.2770958.17213315845968485563.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789080530.6155.1011847312392330491.stgit@warthog.procyon.org.uk/ # v6 --- fs/netfs/internal.h | 2 + fs/netfs/read_helper.c | 164 +++++++++++++++++++++++++++++++++++++++++++ fs/netfs/stats.c | 11 ++- include/linux/netfs.h | 8 +++ include/trace/events/netfs.h | 4 +- 5 files changed, 185 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 98b6f4516da1..b7f2c4459f33 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -34,8 +34,10 @@ extern atomic_t netfs_n_rh_read_failed; extern atomic_t netfs_n_rh_zero; extern atomic_t netfs_n_rh_short_read; extern atomic_t netfs_n_rh_write; +extern atomic_t netfs_n_rh_write_begin; extern atomic_t netfs_n_rh_write_done; extern atomic_t netfs_n_rh_write_failed; +extern atomic_t netfs_n_rh_write_zskip; static inline void netfs_stat(atomic_t *stat) diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index 6d6ed30f417e..da34aedea053 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -772,3 +772,167 @@ int netfs_readpage(struct file *file, return ret; } EXPORT_SYMBOL(netfs_readpage); + +static void netfs_clear_thp(struct page *page) +{ + unsigned int i; + + for (i = 0; i < thp_nr_pages(page); i++) + clear_highpage(page + i); +} + +/** + * netfs_write_begin - Helper to prepare for writing + * @file: The file to read from + * @mapping: The mapping to read from + * @pos: File position at which the write will begin + * @len: The length of the write in this page + * @flags: AOP_* flags + * @_page: Where to put the resultant page + * @_fsdata: Place for the netfs to store a cookie + * @ops: The network filesystem's operations for the helper to use + * @netfs_priv: Private netfs data to be retained in the request + * + * Pre-read data for a write-begin request by drawing data from the cache if + * possible, or the netfs if not. Space beyond the EOF is zero-filled. + * Multiple I/O requests from different sources will get munged together. If + * necessary, the readahead window can be expanded in either direction to a + * more convenient alighment for RPC efficiency or to make storage in the cache + * feasible. + * + * The calling netfs must provide a table of operations, only one of which, + * issue_op, is mandatory. + * + * The check_write_begin() operation can be provided to check for and flush + * conflicting writes once the page is grabbed and locked. It is passed a + * pointer to the fsdata cookie that gets returned to the VM to be passed to + * write_end. It is permitted to sleep. It should return 0 if the request + * should go ahead; unlock the page and return -EAGAIN to cause the page to be + * regot; or return an error. + * + * This is usable whether or not caching is enabled. + */ +int netfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int flags, + struct page **_page, void **_fsdata, + const struct netfs_read_request_ops *ops, + void *netfs_priv) +{ + struct netfs_read_request *rreq; + struct page *page, *xpage; + struct inode *inode = file_inode(file); + unsigned int debug_index = 0; + pgoff_t index = pos >> PAGE_SHIFT; + int pos_in_page = pos & ~PAGE_MASK; + loff_t size; + int ret; + + DEFINE_READAHEAD(ractl, file, NULL, mapping, index); + +retry: + page = grab_cache_page_write_begin(mapping, index, 0); + if (!page) + return -ENOMEM; + + if (ops->check_write_begin) { + /* Allow the netfs (eg. ceph) to flush conflicts. */ + ret = ops->check_write_begin(file, pos, len, page, _fsdata); + if (ret < 0) { + if (ret == -EAGAIN) + goto retry; + goto error; + } + } + + if (PageUptodate(page)) + goto have_page; + + /* If the page is beyond the EOF, we want to clear it - unless it's + * within the cache granule containing the EOF, in which case we need + * to preload the granule. + */ + size = i_size_read(inode); + if (!ops->is_cache_enabled(inode) && + ((pos_in_page == 0 && len == thp_size(page)) || + (pos >= size) || + (pos_in_page == 0 && (pos + len) >= size))) { + netfs_clear_thp(page); + SetPageUptodate(page); + netfs_stat(&netfs_n_rh_write_zskip); + goto have_page_no_wait; + } + + ret = -ENOMEM; + rreq = netfs_alloc_read_request(ops, netfs_priv, file); + if (!rreq) + goto error; + rreq->mapping = page->mapping; + rreq->start = page->index * PAGE_SIZE; + rreq->len = thp_size(page); + rreq->no_unlock_page = page->index; + __set_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags); + netfs_priv = NULL; + + netfs_stat(&netfs_n_rh_write_begin); + trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); + + /* Expand the request to meet caching requirements and download + * preferences. + */ + ractl._nr_pages = thp_nr_pages(page); + netfs_rreq_expand(rreq, &ractl); + netfs_get_read_request(rreq); + + /* We hold the page locks, so we can drop the references */ + while ((xpage = readahead_page(&ractl))) + if (xpage != page) + put_page(xpage); + + atomic_set(&rreq->nr_rd_ops, 1); + do { + if (!netfs_rreq_submit_slice(rreq, &debug_index)) + break; + + } while (rreq->submitted < rreq->len); + + /* Keep nr_rd_ops incremented so that the ref always belongs to us, and + * the service code isn't punted off to a random thread pool to + * process. + */ + for (;;) { + wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1); + netfs_rreq_assess(rreq, false); + if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) + break; + cond_resched(); + } + + ret = rreq->error; + if (ret == 0 && rreq->submitted < rreq->len) + ret = -EIO; + netfs_put_read_request(rreq, false); + if (ret < 0) + goto error; + +have_page: + ret = wait_on_page_fscache_killable(page); + if (ret < 0) + goto error; +have_page_no_wait: + if (netfs_priv) + ops->cleanup(netfs_priv, mapping); + *_page = page; + _leave(" = 0"); + return 0; + +error_put: + netfs_put_read_request(rreq, false); +error: + unlock_page(page); + put_page(page); + if (netfs_priv) + ops->cleanup(netfs_priv, mapping); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(netfs_write_begin); diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c index df6ff5718f25..9ae538c85378 100644 --- a/fs/netfs/stats.c +++ b/fs/netfs/stats.c @@ -24,19 +24,24 @@ atomic_t netfs_n_rh_read_failed; atomic_t netfs_n_rh_zero; atomic_t netfs_n_rh_short_read; atomic_t netfs_n_rh_write; +atomic_t netfs_n_rh_write_begin; atomic_t netfs_n_rh_write_done; atomic_t netfs_n_rh_write_failed; +atomic_t netfs_n_rh_write_zskip; void netfs_stats_show(struct seq_file *m) { - seq_printf(m, "RdHelp : RA=%u RP=%u rr=%u sr=%u\n", + seq_printf(m, "RdHelp : RA=%u RP=%u WB=%u WBZ=%u rr=%u sr=%u\n", atomic_read(&netfs_n_rh_readahead), atomic_read(&netfs_n_rh_readpage), + atomic_read(&netfs_n_rh_write_begin), + atomic_read(&netfs_n_rh_write_zskip), atomic_read(&netfs_n_rh_rreq), atomic_read(&netfs_n_rh_sreq)); - seq_printf(m, "RdHelp : ZR=%u sh=%u\n", + seq_printf(m, "RdHelp : ZR=%u sh=%u sk=%u\n", atomic_read(&netfs_n_rh_zero), - atomic_read(&netfs_n_rh_short_read)); + atomic_read(&netfs_n_rh_short_read), + atomic_read(&netfs_n_rh_write_zskip)); seq_printf(m, "RdHelp : DL=%u ds=%u df=%u di=%u\n", atomic_read(&netfs_n_rh_download), atomic_read(&netfs_n_rh_download_done), diff --git a/include/linux/netfs.h b/include/linux/netfs.h index db4af80cbae3..99659ed9524e 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -147,11 +147,14 @@ struct netfs_read_request { * Operations the network filesystem can/must provide to the helpers. */ struct netfs_read_request_ops { + bool (*is_cache_enabled)(struct inode *inode); void (*init_rreq)(struct netfs_read_request *rreq, struct file *file); void (*expand_readahead)(struct netfs_read_request *rreq); bool (*clamp_length)(struct netfs_read_subrequest *subreq); void (*issue_op)(struct netfs_read_subrequest *subreq); bool (*is_still_valid)(struct netfs_read_request *rreq); + int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, + struct page *page, void **_fsdata); void (*done)(struct netfs_read_request *rreq); void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; @@ -164,6 +167,11 @@ extern int netfs_readpage(struct file *, struct page *, const struct netfs_read_request_ops *, void *); +extern int netfs_write_begin(struct file *, struct address_space *, + loff_t, unsigned int, unsigned int, struct page **, + void **, + const struct netfs_read_request_ops *, + void *); extern void netfs_subreq_terminated(struct netfs_read_subrequest *, ssize_t, bool); extern void netfs_stats_show(struct seq_file *); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 12ad382764c5..a2bf6cd84bd4 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -22,6 +22,7 @@ enum netfs_read_trace { netfs_read_trace_expanded, netfs_read_trace_readahead, netfs_read_trace_readpage, + netfs_read_trace_write_begin, }; enum netfs_rreq_trace { @@ -50,7 +51,8 @@ enum netfs_sreq_trace { #define netfs_read_traces \ EM(netfs_read_trace_expanded, "EXPANDED ") \ EM(netfs_read_trace_readahead, "READAHEAD") \ - E_(netfs_read_trace_readpage, "READPAGE ") + EM(netfs_read_trace_readpage, "READPAGE ") \ + E_(netfs_read_trace_write_begin, "WRITEBEGN") #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS") \ -- cgit v1.2.3 From 726218fdc22c9b52f16e1228499a804bbf262a20 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Feb 2020 14:22:24 +0000 Subject: netfs: Define an interface to talk to a cache Add an interface to the netfs helper library for reading data from the cache instead of downloading it from the server and support for writing data just downloaded or cleared to the cache. The API passes an iov_iter to the cache read/write routines to indicate the data/buffer to be used. This is done using the ITER_XARRAY type to provide direct access to the netfs inode's pagecache. When the netfs's ->begin_cache_operation() method is called, this must fill in the cache_resources in the netfs_read_request struct, including the netfs_cache_ops used by the helper lib to talk to the cache. The helper lib does not directly access the cache. Changes: v6: - Call trace_netfs_read() after beginning the cache op so that the cookie debug ID can be logged[3]. - Don't record the error from writing to the cache. We don't want to pass it back to the netfs[4]. - Fix copy-to-cache subreq amalgamation to not round up as it goes along otherwise it overcalculates the length of the write[5]. v5: - Use end_page_fscache() rather than unlock_page_fscache()[2]. v4: - Added flag to netfs_subreq_terminated() to indicate that the caller may have been running async and stuff that might sleep needs punting to a workqueue (can't use in_softirq()[1]). - Add missing inc of netfs_n_rh_read stat. - Move initial definition of fscache_begin_read_operation() elsewhere. - Need to call op->begin_cache_operation() from netfs_write_begin(). Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1] Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [2] Link: https://lore.kernel.org/r/161781045123.463527.14533348855710902201.stgit@warthog.procyon.org.uk/ [3] Link: https://lore.kernel.org/r/161781046256.463527.18158681600085556192.stgit@warthog.procyon.org.uk/ [4] Link: https://lore.kernel.org/r/161781047695.463527.7463536103593997492.stgit@warthog.procyon.org.uk/ [5] Link: https://lore.kernel.org/r/161118141321.1232039.8296910406755622458.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161036700.2537118.11170748455436854978.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340399569.1303470.1138884774643385730.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539542874.286939.13337898213448136687.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653799826.2770958.9015430297426331950.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789081462.6155.3853904866933313256.stgit@warthog.procyon.org.uk/ # v6 --- fs/netfs/read_helper.c | 239 ++++++++++++++++++++++++++++++++++++++++++- include/linux/netfs.h | 55 ++++++++++ include/trace/events/netfs.h | 2 + 3 files changed, 295 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index da34aedea053..cd3b61d5e192 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -88,6 +88,8 @@ static void netfs_free_read_request(struct work_struct *work) if (rreq->netfs_priv) rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv); trace_netfs_rreq(rreq, netfs_rreq_trace_free); + if (rreq->cache_resources.ops) + rreq->cache_resources.ops->end_operation(&rreq->cache_resources); kfree(rreq); netfs_stat_d(&netfs_n_rh_rreq); } @@ -154,6 +156,34 @@ static void netfs_clear_unread(struct netfs_read_subrequest *subreq) iov_iter_zero(iov_iter_count(&iter), &iter); } +static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, + bool was_async) +{ + struct netfs_read_subrequest *subreq = priv; + + netfs_subreq_terminated(subreq, transferred_or_error, was_async); +} + +/* + * Issue a read against the cache. + * - Eats the caller's ref on subreq. + */ +static void netfs_read_from_cache(struct netfs_read_request *rreq, + struct netfs_read_subrequest *subreq, + bool seek_data) +{ + struct netfs_cache_resources *cres = &rreq->cache_resources; + struct iov_iter iter; + + netfs_stat(&netfs_n_rh_read); + iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, + subreq->start + subreq->transferred, + subreq->len - subreq->transferred); + + cres->ops->read(cres, subreq->start, &iter, seek_data, + netfs_cache_read_terminated, subreq); +} + /* * Fill a subrequest region with zeroes. */ @@ -198,6 +228,141 @@ static void netfs_rreq_completed(struct netfs_read_request *rreq, bool was_async netfs_put_read_request(rreq, was_async); } +/* + * Deal with the completion of writing the data to the cache. We have to clear + * the PG_fscache bits on the pages involved and release the caller's ref. + * + * May be called in softirq mode and we inherit a ref from the caller. + */ +static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq, + bool was_async) +{ + struct netfs_read_subrequest *subreq; + struct page *page; + pgoff_t unlocked = 0; + bool have_unlocked = false; + + rcu_read_lock(); + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); + + xas_for_each(&xas, page, (subreq->start + subreq->len - 1) / PAGE_SIZE) { + /* We might have multiple writes from the same huge + * page, but we mustn't unlock a page more than once. + */ + if (have_unlocked && page->index <= unlocked) + continue; + unlocked = page->index; + end_page_fscache(page); + have_unlocked = true; + } + } + + rcu_read_unlock(); + netfs_rreq_completed(rreq, was_async); +} + +static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, + bool was_async) +{ + struct netfs_read_subrequest *subreq = priv; + struct netfs_read_request *rreq = subreq->rreq; + + if (IS_ERR_VALUE(transferred_or_error)) { + netfs_stat(&netfs_n_rh_write_failed); + } else { + netfs_stat(&netfs_n_rh_write_done); + } + + trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); + + /* If we decrement nr_wr_ops to 0, the ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_wr_ops)) + netfs_rreq_unmark_after_write(rreq, was_async); + + netfs_put_subrequest(subreq, was_async); +} + +/* + * Perform any outstanding writes to the cache. We inherit a ref from the + * caller. + */ +static void netfs_rreq_do_write_to_cache(struct netfs_read_request *rreq) +{ + struct netfs_cache_resources *cres = &rreq->cache_resources; + struct netfs_read_subrequest *subreq, *next, *p; + struct iov_iter iter; + int ret; + + trace_netfs_rreq(rreq, netfs_rreq_trace_write); + + /* We don't want terminating writes trying to wake us up whilst we're + * still going through the list. + */ + atomic_inc(&rreq->nr_wr_ops); + + list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { + if (!test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) { + list_del_init(&subreq->rreq_link); + netfs_put_subrequest(subreq, false); + } + } + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + /* Amalgamate adjacent writes */ + while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { + next = list_next_entry(subreq, rreq_link); + if (next->start != subreq->start + subreq->len) + break; + subreq->len += next->len; + list_del_init(&next->rreq_link); + netfs_put_subrequest(next, false); + } + + ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, + rreq->i_size); + if (ret < 0) { + trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); + continue; + } + + iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages, + subreq->start, subreq->len); + + atomic_inc(&rreq->nr_wr_ops); + netfs_stat(&netfs_n_rh_write); + netfs_get_read_subrequest(subreq); + trace_netfs_sreq(subreq, netfs_sreq_trace_write); + cres->ops->write(cres, subreq->start, &iter, + netfs_rreq_copy_terminated, subreq); + } + + /* If we decrement nr_wr_ops to 0, the usage ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_wr_ops)) + netfs_rreq_unmark_after_write(rreq, false); +} + +static void netfs_rreq_write_to_cache_work(struct work_struct *work) +{ + struct netfs_read_request *rreq = + container_of(work, struct netfs_read_request, work); + + netfs_rreq_do_write_to_cache(rreq); +} + +static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq, + bool was_async) +{ + if (was_async) { + rreq->work.func = netfs_rreq_write_to_cache_work; + if (!queue_work(system_unbound_wq, &rreq->work)) + BUG(); + } else { + netfs_rreq_do_write_to_cache(rreq); + } +} + /* * Unlock the pages in a read operation. We need to set PG_fscache on any * pages we're going to write back before we unlock them. @@ -299,7 +464,10 @@ static void netfs_rreq_short_read(struct netfs_read_request *rreq, netfs_get_read_subrequest(subreq); atomic_inc(&rreq->nr_rd_ops); - netfs_read_from_server(rreq, subreq); + if (subreq->source == NETFS_READ_FROM_CACHE) + netfs_read_from_cache(rreq, subreq, true); + else + netfs_read_from_server(rreq, subreq); } /* @@ -344,6 +512,25 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_read_request *rreq) return false; } +/* + * Check to see if the data read is still valid. + */ +static void netfs_rreq_is_still_valid(struct netfs_read_request *rreq) +{ + struct netfs_read_subrequest *subreq; + + if (!rreq->netfs_ops->is_still_valid || + rreq->netfs_ops->is_still_valid(rreq)) + return; + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + if (subreq->source == NETFS_READ_FROM_CACHE) { + subreq->error = -ESTALE; + __set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); + } + } +} + /* * Assess the state of a read request and decide what to do next. * @@ -355,6 +542,8 @@ static void netfs_rreq_assess(struct netfs_read_request *rreq, bool was_async) trace_netfs_rreq(rreq, netfs_rreq_trace_assess); again: + netfs_rreq_is_still_valid(rreq); + if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) && test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) { if (netfs_rreq_perform_resubmissions(rreq)) @@ -367,6 +556,9 @@ again: clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); + if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags)) + return netfs_rreq_write_to_cache(rreq, was_async); + netfs_rreq_completed(rreq, was_async); } @@ -504,7 +696,10 @@ static enum netfs_read_source netfs_cache_prepare_read(struct netfs_read_subrequ loff_t i_size) { struct netfs_read_request *rreq = subreq->rreq; + struct netfs_cache_resources *cres = &rreq->cache_resources; + if (cres->ops) + return cres->ops->prepare_read(subreq, i_size); if (subreq->start >= rreq->i_size) return NETFS_FILL_WITH_ZEROES; return NETFS_DOWNLOAD_FROM_SERVER; @@ -595,6 +790,9 @@ static bool netfs_rreq_submit_slice(struct netfs_read_request *rreq, case NETFS_DOWNLOAD_FROM_SERVER: netfs_read_from_server(rreq, subreq); break; + case NETFS_READ_FROM_CACHE: + netfs_read_from_cache(rreq, subreq, false); + break; default: BUG(); } @@ -607,9 +805,23 @@ subreq_failed: return false; } +static void netfs_cache_expand_readahead(struct netfs_read_request *rreq, + loff_t *_start, size_t *_len, loff_t i_size) +{ + struct netfs_cache_resources *cres = &rreq->cache_resources; + + if (cres->ops && cres->ops->expand_readahead) + cres->ops->expand_readahead(cres, _start, _len, i_size); +} + static void netfs_rreq_expand(struct netfs_read_request *rreq, struct readahead_control *ractl) { + /* Give the cache a chance to change the request parameters. The + * resultant request must contain the original region. + */ + netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); + /* Give the netfs a chance to change the request parameters. The * resultant request must contain the original region. */ @@ -661,6 +873,7 @@ void netfs_readahead(struct readahead_control *ractl, struct netfs_read_request *rreq; struct page *page; unsigned int debug_index = 0; + int ret; _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); @@ -674,6 +887,12 @@ void netfs_readahead(struct readahead_control *ractl, rreq->start = readahead_pos(ractl); rreq->len = readahead_length(ractl); + if (ops->begin_cache_operation) { + ret = ops->begin_cache_operation(rreq); + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) + goto cleanup_free; + } + netfs_stat(&netfs_n_rh_readahead); trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), netfs_read_trace_readahead); @@ -698,6 +917,9 @@ void netfs_readahead(struct readahead_control *ractl, netfs_rreq_assess(rreq, false); return; +cleanup_free: + netfs_put_read_request(rreq, false); + return; cleanup: if (netfs_priv) ops->cleanup(ractl->mapping, netfs_priv); @@ -744,6 +966,14 @@ int netfs_readpage(struct file *file, rreq->start = page_index(page) * PAGE_SIZE; rreq->len = thp_size(page); + if (ops->begin_cache_operation) { + ret = ops->begin_cache_operation(rreq); + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) { + unlock_page(page); + goto out; + } + } + netfs_stat(&netfs_n_rh_readpage); trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); @@ -768,6 +998,7 @@ int netfs_readpage(struct file *file, ret = rreq->error; if (ret == 0 && rreq->submitted < rreq->len) ret = -EIO; +out: netfs_put_read_request(rreq, false); return ret; } @@ -873,6 +1104,12 @@ retry: __set_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags); netfs_priv = NULL; + if (ops->begin_cache_operation) { + ret = ops->begin_cache_operation(rreq); + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) + goto error_put; + } + netfs_stat(&netfs_n_rh_write_begin); trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 99659ed9524e..9062adfa2fb9 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -92,6 +92,18 @@ enum netfs_read_source { NETFS_INVALID_READ, } __mode(byte); +typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, + bool was_async); + +/* + * Resources required to do operations on a cache. + */ +struct netfs_cache_resources { + const struct netfs_cache_ops *ops; + void *cache_priv; + void *cache_priv2; +}; + /* * Descriptor for a single component subrequest. */ @@ -121,11 +133,13 @@ struct netfs_read_request { struct work_struct work; struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ + struct netfs_cache_resources cache_resources; struct list_head subrequests; /* Requests to fetch I/O from disk or net */ void *netfs_priv; /* Private data for the netfs */ unsigned int debug_id; unsigned int cookie_debug_id; atomic_t nr_rd_ops; /* Number of read ops in progress */ + atomic_t nr_wr_ops; /* Number of write ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ short error; /* 0 or error that occurred */ @@ -149,6 +163,7 @@ struct netfs_read_request { struct netfs_read_request_ops { bool (*is_cache_enabled)(struct inode *inode); void (*init_rreq)(struct netfs_read_request *rreq, struct file *file); + int (*begin_cache_operation)(struct netfs_read_request *rreq); void (*expand_readahead)(struct netfs_read_request *rreq); bool (*clamp_length)(struct netfs_read_subrequest *subreq); void (*issue_op)(struct netfs_read_subrequest *subreq); @@ -159,6 +174,46 @@ struct netfs_read_request_ops { void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; +/* + * Table of operations for access to a cache. This is obtained by + * rreq->ops->begin_cache_operation(). + */ +struct netfs_cache_ops { + /* End an operation */ + void (*end_operation)(struct netfs_cache_resources *cres); + + /* Read data from the cache */ + int (*read)(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + bool seek_data, + netfs_io_terminated_t term_func, + void *term_func_priv); + + /* Write data to the cache */ + int (*write)(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv); + + /* Expand readahead request */ + void (*expand_readahead)(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size); + + /* Prepare a read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ + enum netfs_read_source (*prepare_read)(struct netfs_read_subrequest *subreq, + loff_t i_size); + + /* Prepare a write operation, working out what part of the write we can + * actually do. + */ + int (*prepare_write)(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size); +}; + struct readahead_control; extern void netfs_readahead(struct readahead_control *, const struct netfs_read_request_ops *, diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index a2bf6cd84bd4..e3ebeabd3852 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -43,6 +43,7 @@ enum netfs_sreq_trace { netfs_sreq_trace_submit, netfs_sreq_trace_terminated, netfs_sreq_trace_write, + netfs_sreq_trace_write_skip, netfs_sreq_trace_write_term, }; @@ -77,6 +78,7 @@ enum netfs_sreq_trace { EM(netfs_sreq_trace_submit, "SUBMT") \ EM(netfs_sreq_trace_terminated, "TERM ") \ EM(netfs_sreq_trace_write, "WRITE") \ + EM(netfs_sreq_trace_write_skip, "SKIP ") \ E_(netfs_sreq_trace_write_term, "WTERM") -- cgit v1.2.3 From 0246f3e5737d0b083baefa552fecedd90832dad0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 6 Apr 2021 17:31:54 +0100 Subject: netfs: Add a tracepoint to log failures that would be otherwise unseen Add a tracepoint to log internal failures (such as cache errors) that we don't otherwise want to pass back to the netfs. Signed-off-by: David Howells Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/161781048813.463527.1557000804674707986.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161789082749.6155.15498680577213140870.stgit@warthog.procyon.org.uk/ # v6 --- fs/netfs/read_helper.c | 14 +++++++++-- include/trace/events/netfs.h | 58 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index cd3b61d5e192..1d3b50c5db6d 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -271,6 +271,8 @@ static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, if (IS_ERR_VALUE(transferred_or_error)) { netfs_stat(&netfs_n_rh_write_failed); + trace_netfs_failure(rreq, subreq, transferred_or_error, + netfs_fail_copy_to_cache); } else { netfs_stat(&netfs_n_rh_write_done); } @@ -323,6 +325,7 @@ static void netfs_rreq_do_write_to_cache(struct netfs_read_request *rreq) ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, rreq->i_size); if (ret < 0) { + trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); continue; } @@ -627,6 +630,8 @@ void netfs_subreq_terminated(struct netfs_read_subrequest *subreq, if (IS_ERR_VALUE(transferred_or_error)) { subreq->error = transferred_or_error; + trace_netfs_failure(rreq, subreq, transferred_or_error, + netfs_fail_read); goto failed; } @@ -996,8 +1001,10 @@ int netfs_readpage(struct file *file, } while (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)); ret = rreq->error; - if (ret == 0 && rreq->submitted < rreq->len) + if (ret == 0 && rreq->submitted < rreq->len) { + trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_readpage); ret = -EIO; + } out: netfs_put_read_request(rreq, false); return ret; @@ -1069,6 +1076,7 @@ retry: /* Allow the netfs (eg. ceph) to flush conflicts. */ ret = ops->check_write_begin(file, pos, len, page, _fsdata); if (ret < 0) { + trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); if (ret == -EAGAIN) goto retry; goto error; @@ -1145,8 +1153,10 @@ retry: } ret = rreq->error; - if (ret == 0 && rreq->submitted < rreq->len) + if (ret == 0 && rreq->submitted < rreq->len) { + trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_write_begin); ret = -EIO; + } netfs_put_read_request(rreq, false); if (ret < 0) goto error; diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index e3ebeabd3852..de1c64635e42 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -47,6 +47,15 @@ enum netfs_sreq_trace { netfs_sreq_trace_write_term, }; +enum netfs_failure { + netfs_fail_check_write_begin, + netfs_fail_copy_to_cache, + netfs_fail_read, + netfs_fail_short_readpage, + netfs_fail_short_write_begin, + netfs_fail_prepare_write, +}; + #endif #define netfs_read_traces \ @@ -81,6 +90,14 @@ enum netfs_sreq_trace { EM(netfs_sreq_trace_write_skip, "SKIP ") \ E_(netfs_sreq_trace_write_term, "WTERM") +#define netfs_failures \ + EM(netfs_fail_check_write_begin, "check-write-begin") \ + EM(netfs_fail_copy_to_cache, "copy-to-cache") \ + EM(netfs_fail_read, "read") \ + EM(netfs_fail_short_readpage, "short-readpage") \ + EM(netfs_fail_short_write_begin, "short-write-begin") \ + E_(netfs_fail_prepare_write, "prep-write") + /* * Export enum symbols via userspace. @@ -94,6 +111,7 @@ netfs_read_traces; netfs_rreq_traces; netfs_sreq_sources; netfs_sreq_traces; +netfs_failures; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -197,6 +215,46 @@ TRACE_EVENT(netfs_sreq, __entry->error) ); +TRACE_EVENT(netfs_failure, + TP_PROTO(struct netfs_read_request *rreq, + struct netfs_read_subrequest *sreq, + int error, enum netfs_failure what), + + TP_ARGS(rreq, sreq, error, what), + + TP_STRUCT__entry( + __field(unsigned int, rreq ) + __field(unsigned short, index ) + __field(short, error ) + __field(unsigned short, flags ) + __field(enum netfs_read_source, source ) + __field(enum netfs_failure, what ) + __field(size_t, len ) + __field(size_t, transferred ) + __field(loff_t, start ) + ), + + TP_fast_assign( + __entry->rreq = rreq->debug_id; + __entry->index = sreq ? sreq->debug_index : 0; + __entry->error = error; + __entry->flags = sreq ? sreq->flags : 0; + __entry->source = sreq ? sreq->source : NETFS_INVALID_READ; + __entry->what = what; + __entry->len = sreq ? sreq->len : 0; + __entry->transferred = sreq ? sreq->transferred : 0; + __entry->start = sreq ? sreq->start : 0; + ), + + TP_printk("R=%08x[%u] %s f=%02x s=%llx %zx/%zx %s e=%d", + __entry->rreq, __entry->index, + __print_symbolic(__entry->source, netfs_sreq_sources), + __entry->flags, + __entry->start, __entry->transferred, __entry->len, + __print_symbolic(__entry->what, netfs_failures), + __entry->error) + ); + #endif /* _TRACE_NETFS_H */ /* This part must be outside protection */ -- cgit v1.2.3