From 2f9ad0ac947ccbe3ffe7c6229c9330f2a7755f64 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 14 Aug 2018 02:43:48 +0000 Subject: 9p/xen: fix check for xenbus_read error in front_probe If the xen bus exists but does not expose the proper interface, it is possible to get a non-zero length but still some error, leading to strcmp failing trying to load invalid memory addresses e.g. fffffffffffffffe. There is then no need to check length when there is no error, as the xenbus driver guarantees that the string is nul-terminated. Link: http://lkml.kernel.org/r/1534236007-10170-1-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Reviewed-by: Stefano Stabellini Cc: Eric Van Hensbergen Cc: Latchesar Ionkov --- net/9p/trans_xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index c2d54ac76bfd..843cb823d9b9 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -391,8 +391,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, unsigned int max_rings, max_ring_order, len = 0; versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len); - if (!len) - return -EINVAL; + if (IS_ERR(versions)) + return PTR_ERR(versions); if (strcmp(versions, "1")) { kfree(versions); return -EINVAL; -- cgit v1.2.3 From 62e3941776fea8678bb8120607039410b1b61a65 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 28 Aug 2018 07:32:35 +0900 Subject: 9p: clear dangling pointers in p9stat_free p9stat_free is more of a cleanup function than a 'free' function as it only frees the content of the struct; there are chances of use-after-free if it is improperly used (e.g. p9stat_free called twice as it used to be possible to) Clearing dangling pointers makes the function idempotent and safer to use. Link: http://lkml.kernel.org/r/1535410108-20650-2-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Reported-by: syzbot+d4252148d198410b864f@syzkaller.appspotmail.com --- net/9p/protocol.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 4a1e1dd30b52..ee32bbf12675 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -46,10 +46,15 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); + stbuf->name = NULL; kfree(stbuf->uid); + stbuf->uid = NULL; kfree(stbuf->gid); + stbuf->gid = NULL; kfree(stbuf->muid); + stbuf->muid = NULL; kfree(stbuf->extension); + stbuf->extension = NULL; } EXPORT_SYMBOL(p9stat_free); -- cgit v1.2.3 From 996d5b4db4b191f2676cf8775565cab8a5e2753b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 11 Jul 2018 14:02:24 -0700 Subject: 9p: Use a slab for allocating requests Replace the custom batch allocation with a slab. Use an IDR to store pointers to the active requests instead of an array. We don't try to handle P9_NOTAG specially; the IDR will happily shrink all the way back once the TVERSION call has completed. Link: http://lkml.kernel.org/r/20180711210225.19730-6-willy@infradead.org Signed-off-by: Matthew Wilcox Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Dominique Martinet --- include/net/9p/client.h | 51 ++--------- net/9p/client.c | 238 +++++++++++++++++------------------------------- net/9p/mod.c | 9 +- 3 files changed, 102 insertions(+), 196 deletions(-) (limited to 'net') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 0fa0fbab33b0..a4dc42c53d18 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -64,22 +64,15 @@ enum p9_trans_status { /** * enum p9_req_status_t - status of a request - * @REQ_STATUS_IDLE: request slot unused * @REQ_STATUS_ALLOC: request has been allocated but not sent * @REQ_STATUS_UNSENT: request waiting to be sent * @REQ_STATUS_SENT: request sent to server * @REQ_STATUS_RCVD: response received from server * @REQ_STATUS_FLSHD: request has been flushed * @REQ_STATUS_ERROR: request encountered an error on the client side - * - * The @REQ_STATUS_IDLE state is used to mark a request slot as unused - * but use is actually tracked by the idpool structure which handles tag - * id allocation. - * */ enum p9_req_status_t { - REQ_STATUS_IDLE, REQ_STATUS_ALLOC, REQ_STATUS_UNSENT, REQ_STATUS_SENT, @@ -92,24 +85,12 @@ enum p9_req_status_t { * struct p9_req_t - request slots * @status: status of this request slot * @t_err: transport error - * @flush_tag: tag of request being flushed (for flush requests) * @wq: wait_queue for the client to block on for this request * @tc: the request fcall structure * @rc: the response fcall structure * @aux: transport specific data (provided for trans_fd migration) * @req_list: link for higher level objects to chain requests - * - * Transport use an array to track outstanding requests - * instead of a list. While this may incurr overhead during initial - * allocation or expansion, it makes request lookup much easier as the - * tag id is a index into an array. (We use tag+1 so that we can accommodate - * the -1 tag for the T_VERSION request). - * This also has the nice effect of only having to allocate wait_queues - * once, instead of constantly allocating and freeing them. Its possible - * other resources could benefit from this scheme as well. - * */ - struct p9_req_t { int status; int t_err; @@ -117,40 +98,26 @@ struct p9_req_t { struct p9_fcall *tc; struct p9_fcall *rc; void *aux; - struct list_head req_list; }; /** * struct p9_client - per client instance state - * @lock: protect @fidlist + * @lock: protect @fids and @reqs * @msize: maximum data size negotiated by protocol - * @dotu: extension flags negotiated by protocol * @proto_version: 9P protocol version to use * @trans_mod: module API instantiated with this client + * @status: connection state * @trans: tranport instance state and API * @fids: All active FID handles - * @tagpool - transaction id accounting for session - * @reqs - 2D array of requests - * @max_tag - current maximum tag id allocated - * @name - node name used as client id + * @reqs: All active requests. + * @name: node name used as client id * * The client structure is used to keep track of various per-client * state that has been instantiated. - * In order to minimize per-transaction overhead we use a - * simple array to lookup requests instead of a hash table - * or linked list. In order to support larger number of - * transactions, we make this a 2D array, allocating new rows - * when we need to grow the total number of the transactions. - * - * Each row is 256 requests and we'll support up to 256 rows for - * a total of 64k concurrent requests per session. - * - * Bugs: duplicated data and potentially unnecessary elements. */ - struct p9_client { - spinlock_t lock; /* protect client structure */ + spinlock_t lock; unsigned int msize; unsigned char proto_version; struct p9_trans_module *trans_mod; @@ -170,10 +137,7 @@ struct p9_client { } trans_opts; struct idr fids; - - struct p9_idpool *tagpool; - struct p9_req_t *reqs[P9_ROW_MAXTAG]; - int max_tag; + struct idr reqs; char name[__NEW_UTS_LEN + 1]; }; @@ -279,4 +243,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *); int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int); int p9_client_readlink(struct p9_fid *fid, char **target); +int p9_client_init(void); +void p9_client_exit(void); + #endif /* NET_9P_CLIENT_H */ diff --git a/net/9p/client.c b/net/9p/client.c index deae53a7dffc..88db45966740 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -242,132 +242,102 @@ static struct p9_fcall *p9_fcall_alloc(int alloc_msize) return fc; } +static struct kmem_cache *p9_req_cache; + /** - * p9_tag_alloc - lookup/allocate a request by tag - * @c: client session to lookup tag within - * @tag: numeric id for transaction - * - * this is a simple array lookup, but will grow the - * request_slots as necessary to accommodate transaction - * ids which did not previously have a slot. - * - * this code relies on the client spinlock to manage locks, its - * possible we should switch to something else, but I'd rather - * stick with something low-overhead for the common case. + * p9_req_alloc - Allocate a new request. + * @c: Client session. + * @type: Transaction type. + * @max_size: Maximum packet size for this request. * + * Context: Process context. + * Return: Pointer to new request. */ - static struct p9_req_t * -p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) +p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) { - unsigned long flags; - int row, col; - struct p9_req_t *req; + struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS); int alloc_msize = min(c->msize, max_size); + int tag; - /* This looks up the original request by tag so we know which - * buffer to read the data into */ - tag++; - - if (tag >= c->max_tag) { - spin_lock_irqsave(&c->lock, flags); - /* check again since original check was outside of lock */ - while (tag >= c->max_tag) { - row = (tag / P9_ROW_MAXTAG); - c->reqs[row] = kcalloc(P9_ROW_MAXTAG, - sizeof(struct p9_req_t), GFP_ATOMIC); - - if (!c->reqs[row]) { - pr_err("Couldn't grow tag array\n"); - spin_unlock_irqrestore(&c->lock, flags); - return ERR_PTR(-ENOMEM); - } - for (col = 0; col < P9_ROW_MAXTAG; col++) { - req = &c->reqs[row][col]; - req->status = REQ_STATUS_IDLE; - init_waitqueue_head(&req->wq); - } - c->max_tag += P9_ROW_MAXTAG; - } - spin_unlock_irqrestore(&c->lock, flags); - } - row = tag / P9_ROW_MAXTAG; - col = tag % P9_ROW_MAXTAG; + if (!req) + return NULL; - req = &c->reqs[row][col]; - if (!req->tc) - req->tc = p9_fcall_alloc(alloc_msize); - if (!req->rc) - req->rc = p9_fcall_alloc(alloc_msize); + req->tc = p9_fcall_alloc(alloc_msize); + req->rc = p9_fcall_alloc(alloc_msize); if (!req->tc || !req->rc) - goto grow_failed; + goto free; p9pdu_reset(req->tc); p9pdu_reset(req->rc); - - req->tc->tag = tag-1; req->status = REQ_STATUS_ALLOC; + init_waitqueue_head(&req->wq); + INIT_LIST_HEAD(&req->req_list); + + idr_preload(GFP_NOFS); + spin_lock_irq(&c->lock); + if (type == P9_TVERSION) + tag = idr_alloc(&c->reqs, req, P9_NOTAG, P9_NOTAG + 1, + GFP_NOWAIT); + else + tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT); + req->tc->tag = tag; + spin_unlock_irq(&c->lock); + idr_preload_end(); + if (tag < 0) + goto free; return req; -grow_failed: - pr_err("Couldn't grow tag array\n"); +free: kfree(req->tc); kfree(req->rc); - req->tc = req->rc = NULL; + kmem_cache_free(p9_req_cache, req); return ERR_PTR(-ENOMEM); } /** - * p9_tag_lookup - lookup a request by tag - * @c: client session to lookup tag within - * @tag: numeric id for transaction + * p9_tag_lookup - Look up a request by tag. + * @c: Client session. + * @tag: Transaction ID. * + * Context: Any context. + * Return: A request, or %NULL if there is no request with that tag. */ - struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) { - int row, col; - - /* This looks up the original request by tag so we know which - * buffer to read the data into */ - tag++; - - if (tag >= c->max_tag) - return NULL; + struct p9_req_t *req; - row = tag / P9_ROW_MAXTAG; - col = tag % P9_ROW_MAXTAG; + rcu_read_lock(); + req = idr_find(&c->reqs, tag); + /* There's no refcount on the req; a malicious server could cause + * us to dereference a NULL pointer + */ + rcu_read_unlock(); - return &c->reqs[row][col]; + return req; } EXPORT_SYMBOL(p9_tag_lookup); /** - * p9_tag_init - setup tags structure and contents - * @c: v9fs client struct - * - * This initializes the tags structure for each client instance. + * p9_free_req - Free a request. + * @c: Client session. + * @r: Request to free. * + * Context: Any context. */ - -static int p9_tag_init(struct p9_client *c) +static void p9_free_req(struct p9_client *c, struct p9_req_t *r) { - int err = 0; + unsigned long flags; + u16 tag = r->tc->tag; - c->tagpool = p9_idpool_create(); - if (IS_ERR(c->tagpool)) { - err = PTR_ERR(c->tagpool); - goto error; - } - err = p9_idpool_get(c->tagpool); /* reserve tag 0 */ - if (err < 0) { - p9_idpool_destroy(c->tagpool); - goto error; - } - c->max_tag = 0; -error: - return err; + p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); + spin_lock_irqsave(&c->lock, flags); + idr_remove(&c->reqs, tag); + spin_unlock_irqrestore(&c->lock, flags); + kfree(r->tc); + kfree(r->rc); + kmem_cache_free(p9_req_cache, r); } /** @@ -379,52 +349,15 @@ error: */ static void p9_tag_cleanup(struct p9_client *c) { - int row, col; - - /* check to insure all requests are idle */ - for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { - for (col = 0; col < P9_ROW_MAXTAG; col++) { - if (c->reqs[row][col].status != REQ_STATUS_IDLE) { - p9_debug(P9_DEBUG_MUX, - "Attempting to cleanup non-free tag %d,%d\n", - row, col); - /* TODO: delay execution of cleanup */ - return; - } - } - } - - if (c->tagpool) { - p9_idpool_put(0, c->tagpool); /* free reserved tag 0 */ - p9_idpool_destroy(c->tagpool); - } + struct p9_req_t *req; + int id; - /* free requests associated with tags */ - for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { - for (col = 0; col < P9_ROW_MAXTAG; col++) { - kfree(c->reqs[row][col].tc); - kfree(c->reqs[row][col].rc); - } - kfree(c->reqs[row]); + rcu_read_lock(); + idr_for_each_entry(&c->reqs, req, id) { + pr_info("Tag %d still in use\n", id); + p9_free_req(c, req); } - c->max_tag = 0; -} - -/** - * p9_free_req - free a request and clean-up as necessary - * c: client state - * r: request to release - * - */ - -static void p9_free_req(struct p9_client *c, struct p9_req_t *r) -{ - int tag = r->tc->tag; - p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); - - r->status = REQ_STATUS_IDLE; - if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) - p9_idpool_put(tag, c->tagpool); + rcu_read_unlock(); } /** @@ -698,7 +631,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, int8_t type, int req_size, const char *fmt, va_list ap) { - int tag, err; + int err; struct p9_req_t *req; p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type); @@ -711,24 +644,17 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) return ERR_PTR(-EIO); - tag = P9_NOTAG; - if (type != P9_TVERSION) { - tag = p9_idpool_get(c->tagpool); - if (tag < 0) - return ERR_PTR(-ENOMEM); - } - - req = p9_tag_alloc(c, tag, req_size); + req = p9_tag_alloc(c, type, req_size); if (IS_ERR(req)) return req; /* marshall the data */ - p9pdu_prepare(req->tc, tag, type); + p9pdu_prepare(req->tc, req->tc->tag, type); err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); if (err) goto reterr; p9pdu_finalize(c, req->tc); - trace_9p_client_req(c, type, tag); + trace_9p_client_req(c, type, req->tc->tag); return req; reterr: p9_free_req(c, req); @@ -1026,14 +952,11 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) spin_lock_init(&clnt->lock); idr_init(&clnt->fids); - - err = p9_tag_init(clnt); - if (err < 0) - goto free_client; + idr_init(&clnt->reqs); err = parse_opts(options, clnt); if (err < 0) - goto destroy_tagpool; + goto free_client; if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); @@ -1042,7 +965,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) err = -EPROTONOSUPPORT; p9_debug(P9_DEBUG_ERROR, "No transport defined or default transport\n"); - goto destroy_tagpool; + goto free_client; } p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", @@ -1065,8 +988,6 @@ close_trans: clnt->trans_mod->close(clnt); put_trans: v9fs_put_trans(clnt->trans_mod); -destroy_tagpool: - p9_idpool_destroy(clnt->tagpool); free_client: kfree(clnt); return ERR_PTR(err); @@ -2282,3 +2203,14 @@ error: return err; } EXPORT_SYMBOL(p9_client_readlink); + +int __init p9_client_init(void) +{ + p9_req_cache = KMEM_CACHE(p9_req_t, 0); + return p9_req_cache ? 0 : -ENOMEM; +} + +void __exit p9_client_exit(void) +{ + kmem_cache_destroy(p9_req_cache); +} diff --git a/net/9p/mod.c b/net/9p/mod.c index 253ba824a325..0da56d6af73b 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -171,11 +171,17 @@ void v9fs_put_trans(struct p9_trans_module *m) */ static int __init init_p9(void) { + int ret; + + ret = p9_client_init(); + if (ret) + return ret; + p9_error_init(); pr_info("Installing 9P2000 support\n"); p9_trans_fd_init(); - return 0; + return ret; } /** @@ -188,6 +194,7 @@ static void __exit exit_p9(void) pr_info("Unloading 9P2000 support\n"); p9_trans_fd_exit(); + p9_client_exit(); } module_init(init_p9) -- cgit v1.2.3 From 6348b903d79119a8157aace08ab99521f5dba139 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 11 Jul 2018 14:02:25 -0700 Subject: 9p: Remove p9_idpool There are no more users left of the p9_idpool; delete it. Link: http://lkml.kernel.org/r/20180711210225.19730-7-willy@infradead.org Signed-off-by: Matthew Wilcox Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Dominique Martinet --- include/net/9p/9p.h | 8 --- net/9p/Makefile | 1 - net/9p/util.c | 140 ---------------------------------------------------- 3 files changed, 149 deletions(-) delete mode 100644 net/9p/util.c (limited to 'net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index b8eb51a661e5..e23896116d9a 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -561,16 +561,8 @@ struct p9_fcall { u8 *sdata; }; -struct p9_idpool; - int p9_errstr2errno(char *errstr, int len); -struct p9_idpool *p9_idpool_create(void); -void p9_idpool_destroy(struct p9_idpool *); -int p9_idpool_get(struct p9_idpool *p); -void p9_idpool_put(int id, struct p9_idpool *p); -int p9_idpool_check(int id, struct p9_idpool *p); - int p9_error_init(void); int p9_trans_fd_init(void); void p9_trans_fd_exit(void); diff --git a/net/9p/Makefile b/net/9p/Makefile index c0486cfc85d9..aa0a5641e5d0 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o mod.o \ client.o \ error.o \ - util.o \ protocol.o \ trans_fd.o \ trans_common.o \ diff --git a/net/9p/util.c b/net/9p/util.c deleted file mode 100644 index 55ad98277e85..000000000000 --- a/net/9p/util.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * net/9p/util.c - * - * This file contains some helper functions - * - * Copyright (C) 2007 by Latchesar Ionkov - * Copyright (C) 2004 by Eric Van Hensbergen - * Copyright (C) 2002 by Ron Minnich - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/** - * struct p9_idpool - per-connection accounting for tag idpool - * @lock: protects the pool - * @pool: idr to allocate tag id from - * - */ - -struct p9_idpool { - spinlock_t lock; - struct idr pool; -}; - -/** - * p9_idpool_create - create a new per-connection id pool - * - */ - -struct p9_idpool *p9_idpool_create(void) -{ - struct p9_idpool *p; - - p = kmalloc(sizeof(struct p9_idpool), GFP_KERNEL); - if (!p) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&p->lock); - idr_init(&p->pool); - - return p; -} -EXPORT_SYMBOL(p9_idpool_create); - -/** - * p9_idpool_destroy - create a new per-connection id pool - * @p: idpool to destroy - */ - -void p9_idpool_destroy(struct p9_idpool *p) -{ - idr_destroy(&p->pool); - kfree(p); -} -EXPORT_SYMBOL(p9_idpool_destroy); - -/** - * p9_idpool_get - allocate numeric id from pool - * @p: pool to allocate from - * - * Bugs: This seems to be an awful generic function, should it be in idr.c with - * the lock included in struct idr? - */ - -int p9_idpool_get(struct p9_idpool *p) -{ - int i; - unsigned long flags; - - idr_preload(GFP_NOFS); - spin_lock_irqsave(&p->lock, flags); - - /* no need to store exactly p, we just need something non-null */ - i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT); - - spin_unlock_irqrestore(&p->lock, flags); - idr_preload_end(); - if (i < 0) - return -1; - - p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p); - return i; -} -EXPORT_SYMBOL(p9_idpool_get); - -/** - * p9_idpool_put - release numeric id from pool - * @id: numeric id which is being released - * @p: pool to release id into - * - * Bugs: This seems to be an awful generic function, should it be in idr.c with - * the lock included in struct idr? - */ - -void p9_idpool_put(int id, struct p9_idpool *p) -{ - unsigned long flags; - - p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p); - - spin_lock_irqsave(&p->lock, flags); - idr_remove(&p->pool, id); - spin_unlock_irqrestore(&p->lock, flags); -} -EXPORT_SYMBOL(p9_idpool_put); - -/** - * p9_idpool_check - check if the specified id is available - * @id: id to check - * @p: pool to check - */ - -int p9_idpool_check(int id, struct p9_idpool *p) -{ - return idr_find(&p->pool, id) != NULL; -} -EXPORT_SYMBOL(p9_idpool_check); -- cgit v1.2.3 From 523adb6cc10b48655c0abe556505240741425b49 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 30 Jul 2018 05:55:19 +0000 Subject: 9p: embed fcall in req to round down buffer allocs 'msize' is often a power of two, or at least page-aligned, so avoiding an overhead of two dozen bytes for each allocation will help the allocator do its work and reduce memory fragmentation. Link: http://lkml.kernel.org/r/1533825236-22896-1-git-send-email-asmadeus@codewreck.org Suggested-by: Matthew Wilcox Signed-off-by: Dominique Martinet Reviewed-by: Greg Kurz Acked-by: Jun Piao Cc: Matthew Wilcox --- include/net/9p/client.h | 5 +- net/9p/client.c | 167 +++++++++++++++++++++++++----------------------- net/9p/trans_fd.c | 12 ++-- net/9p/trans_rdma.c | 29 +++++---- net/9p/trans_virtio.c | 18 +++--- net/9p/trans_xen.c | 12 ++-- 6 files changed, 125 insertions(+), 118 deletions(-) (limited to 'net') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index a4dc42c53d18..c2671d40bb6b 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -95,8 +95,8 @@ struct p9_req_t { int status; int t_err; wait_queue_head_t wq; - struct p9_fcall *tc; - struct p9_fcall *rc; + struct p9_fcall tc; + struct p9_fcall rc; void *aux; struct list_head req_list; }; @@ -230,6 +230,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, kgid_t gid, struct p9_qid *); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); +void p9_fcall_fini(struct p9_fcall *fc); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); diff --git a/net/9p/client.c b/net/9p/client.c index 88db45966740..ed78751aee7c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -231,16 +231,20 @@ free_and_return: return ret; } -static struct p9_fcall *p9_fcall_alloc(int alloc_msize) +static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize) { - struct p9_fcall *fc; - fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); - if (!fc) - return NULL; + fc->sdata = kmalloc(alloc_msize, GFP_NOFS); + if (!fc->sdata) + return -ENOMEM; fc->capacity = alloc_msize; - fc->sdata = (char *) fc + sizeof(struct p9_fcall); - return fc; + return 0; +} + +void p9_fcall_fini(struct p9_fcall *fc) +{ + kfree(fc->sdata); } +EXPORT_SYMBOL(p9_fcall_fini); static struct kmem_cache *p9_req_cache; @@ -263,13 +267,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) if (!req) return NULL; - req->tc = p9_fcall_alloc(alloc_msize); - req->rc = p9_fcall_alloc(alloc_msize); - if (!req->tc || !req->rc) + if (p9_fcall_init(&req->tc, alloc_msize)) + goto free_req; + if (p9_fcall_init(&req->rc, alloc_msize)) goto free; - p9pdu_reset(req->tc); - p9pdu_reset(req->rc); + p9pdu_reset(&req->tc); + p9pdu_reset(&req->rc); req->status = REQ_STATUS_ALLOC; init_waitqueue_head(&req->wq); INIT_LIST_HEAD(&req->req_list); @@ -281,7 +285,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) GFP_NOWAIT); else tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT); - req->tc->tag = tag; + req->tc.tag = tag; spin_unlock_irq(&c->lock); idr_preload_end(); if (tag < 0) @@ -290,8 +294,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) return req; free: - kfree(req->tc); - kfree(req->rc); + p9_fcall_fini(&req->tc); + p9_fcall_fini(&req->rc); +free_req: kmem_cache_free(p9_req_cache, req); return ERR_PTR(-ENOMEM); } @@ -329,14 +334,14 @@ EXPORT_SYMBOL(p9_tag_lookup); static void p9_free_req(struct p9_client *c, struct p9_req_t *r) { unsigned long flags; - u16 tag = r->tc->tag; + u16 tag = r->tc.tag; p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); spin_lock_irqsave(&c->lock, flags); idr_remove(&c->reqs, tag); spin_unlock_irqrestore(&c->lock, flags); - kfree(r->tc); - kfree(r->rc); + p9_fcall_fini(&r->tc); + p9_fcall_fini(&r->rc); kmem_cache_free(p9_req_cache, r); } @@ -368,7 +373,7 @@ static void p9_tag_cleanup(struct p9_client *c) */ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) { - p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag); /* * This barrier is needed to make sure any change made to req before @@ -378,7 +383,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) req->status = status; wake_up(&req->wq); - p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); + p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag); } EXPORT_SYMBOL(p9_client_cb); @@ -449,18 +454,18 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) int err; int ecode; - err = p9_parse_header(req->rc, NULL, &type, NULL, 0); - if (req->rc->size >= c->msize) { + err = p9_parse_header(&req->rc, NULL, &type, NULL, 0); + if (req->rc.size >= c->msize) { p9_debug(P9_DEBUG_ERROR, "requested packet size too big: %d\n", - req->rc->size); + req->rc.size); return -EIO; } /* * dump the response from server * This should be after check errors which poplulate pdu_fcall. */ - trace_9p_protocol_dump(c, req->rc); + trace_9p_protocol_dump(c, &req->rc); if (err) { p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; @@ -470,7 +475,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!p9_is_proto_dotl(c)) { char *ename; - err = p9pdu_readf(req->rc, c->proto_version, "s?d", + err = p9pdu_readf(&req->rc, c->proto_version, "s?d", &ename, &ecode); if (err) goto out_err; @@ -486,7 +491,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) } kfree(ename); } else { - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode); err = -ecode; p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); @@ -520,12 +525,12 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, int8_t type; char *ename = NULL; - err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + err = p9_parse_header(&req->rc, NULL, &type, NULL, 0); /* * dump the response from server * This should be after parse_header which poplulate pdu_fcall. */ - trace_9p_protocol_dump(c, req->rc); + trace_9p_protocol_dump(c, &req->rc); if (err) { p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; @@ -540,13 +545,13 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, /* 7 = header size for RERROR; */ int inline_len = in_hdrlen - 7; - len = req->rc->size - req->rc->offset; + len = req->rc.size - req->rc.offset; if (len > (P9_ZC_HDR_SZ - 7)) { err = -EFAULT; goto out_err; } - ename = &req->rc->sdata[req->rc->offset]; + ename = &req->rc.sdata[req->rc.offset]; if (len > inline_len) { /* We have error in external buffer */ if (!copy_from_iter_full(ename + inline_len, @@ -556,7 +561,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, } } ename = NULL; - err = p9pdu_readf(req->rc, c->proto_version, "s?d", + err = p9pdu_readf(&req->rc, c->proto_version, "s?d", &ename, &ecode); if (err) goto out_err; @@ -572,7 +577,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, } kfree(ename); } else { - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode); err = -ecode; p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); @@ -605,7 +610,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) int16_t oldtag; int err; - err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1); + err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1); if (err) return err; @@ -649,12 +654,12 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, return req; /* marshall the data */ - p9pdu_prepare(req->tc, req->tc->tag, type); - err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); + p9pdu_prepare(&req->tc, req->tc.tag, type); + err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap); if (err) goto reterr; - p9pdu_finalize(c, req->tc); - trace_9p_client_req(c, type, req->tc->tag); + p9pdu_finalize(c, &req->tc); + trace_9p_client_req(c, type, req->tc.tag); return req; reterr: p9_free_req(c, req); @@ -739,7 +744,7 @@ recalc_sigpending: goto reterr; err = p9_check_errors(c, req); - trace_9p_client_res(c, type, req->rc->tag, err); + trace_9p_client_res(c, type, req->rc.tag, err); if (!err) return req; reterr: @@ -821,7 +826,7 @@ recalc_sigpending: goto reterr; err = p9_check_zc_errors(c, req, uidata, in_hdrlen); - trace_9p_client_res(c, type, req->rc->tag, err); + trace_9p_client_res(c, type, req->rc.tag, err); if (!err) return req; reterr: @@ -904,10 +909,10 @@ static int p9_client_version(struct p9_client *c) if (IS_ERR(req)) return PTR_ERR(req); - err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); + err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version); if (err) { p9_debug(P9_DEBUG_9P, "version error %d\n", err); - trace_9p_protocol_dump(c, req->rc); + trace_9p_protocol_dump(c, &req->rc); goto error; } @@ -1056,9 +1061,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); p9_free_req(clnt, req); goto error; } @@ -1113,9 +1118,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); + err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -1180,9 +1185,9 @@ int p9_client_open(struct p9_fid *fid, int mode) goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } @@ -1224,9 +1229,9 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } @@ -1269,9 +1274,9 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } @@ -1308,9 +1313,9 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name, goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } @@ -1506,10 +1511,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) break; } - *err = p9pdu_readf(req->rc, clnt->proto_version, + *err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr); if (*err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); p9_free_req(clnt, req); break; } @@ -1579,9 +1584,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) break; } - *err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); + *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count); if (*err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); p9_free_req(clnt, req); break; } @@ -1623,9 +1628,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); + err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); p9_free_req(clnt, req); goto error; } @@ -1676,9 +1681,9 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); + err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); p9_free_req(clnt, req); goto error; } @@ -1828,11 +1833,11 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type, - &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, - &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); + err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type, + &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, + &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); p9_free_req(clnt, req); goto error; } @@ -1936,9 +1941,9 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, err = PTR_ERR(req); goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); + err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -2024,9 +2029,9 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) goto error; } - err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); + err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } if (rsize < count) { @@ -2065,9 +2070,9 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, if (IS_ERR(req)) return PTR_ERR(req); - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, @@ -2096,9 +2101,9 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, if (IS_ERR(req)) return PTR_ERR(req); - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, @@ -2131,9 +2136,9 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) if (IS_ERR(req)) return PTR_ERR(req); - err = p9pdu_readf(req->rc, clnt->proto_version, "b", status); + err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); @@ -2162,11 +2167,11 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) if (IS_ERR(req)) return PTR_ERR(req); - err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type, - &glock->start, &glock->length, &glock->proc_id, - &glock->client_id); + err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type, + &glock->start, &glock->length, &glock->proc_id, + &glock->client_id); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " @@ -2192,9 +2197,9 @@ int p9_client_readlink(struct p9_fid *fid, char **target) if (IS_ERR(req)) return PTR_ERR(req); - err = p9pdu_readf(req->rc, clnt->proto_version, "s", target); + err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target); if (err) { - trace_9p_protocol_dump(clnt, req->rc); + trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index e2ef3c782c53..51615c0fb744 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -354,7 +354,7 @@ static void p9_read_work(struct work_struct *work) goto error; } - if (m->req->rc == NULL) { + if (!m->req->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", m->rc.tag, m->req); @@ -362,7 +362,7 @@ static void p9_read_work(struct work_struct *work) err = -EIO; goto error; } - m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall); + m->rc.sdata = m->req->rc.sdata; memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity); m->rc.capacity = m->rc.size; } @@ -372,7 +372,7 @@ static void p9_read_work(struct work_struct *work) */ if ((m->req) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new packet\n"); - m->req->rc->size = m->rc.offset; + m->req->rc.size = m->rc.offset; spin_lock(&m->client->lock); if (m->req->status != REQ_STATUS_ERROR) status = REQ_STATUS_RCVD; @@ -469,8 +469,8 @@ static void p9_write_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); - m->wbuf = req->tc->sdata; - m->wsize = req->tc->size; + m->wbuf = req->tc.sdata; + m->wsize = req->tc.size; m->wpos = 0; spin_unlock(&m->client->lock); } @@ -663,7 +663,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", - m, current, req->tc, req->tc->id); + m, current, &req->tc, req->tc.id); if (m->err < 0) return m->err; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index b513cffeeb3c..5b0cda1aaa7a 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -122,7 +122,7 @@ struct p9_rdma_context { dma_addr_t busa; union { struct p9_req_t *req; - struct p9_fcall *rc; + struct p9_fcall rc; }; }; @@ -320,8 +320,8 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) goto err_out; - c->rc->size = wc->byte_len; - err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); + c->rc.size = wc->byte_len; + err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1); if (err) goto err_out; @@ -331,12 +331,13 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc) /* Check that we have not yet received a reply for this request. */ - if (unlikely(req->rc)) { + if (unlikely(req->rc.sdata)) { pr_err("Duplicate reply for request %d", tag); goto err_out; } - req->rc = c->rc; + req->rc.size = c->rc.size; + req->rc.sdata = c->rc.sdata; p9_client_cb(client, req, REQ_STATUS_RCVD); out: @@ -361,7 +362,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc) container_of(wc->wr_cqe, struct p9_rdma_context, cqe); ib_dma_unmap_single(rdma->cm_id->device, - c->busa, c->req->tc->size, + c->busa, c->req->tc.size, DMA_TO_DEVICE); up(&rdma->sq_sem); kfree(c); @@ -401,7 +402,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) struct ib_sge sge; c->busa = ib_dma_map_single(rdma->cm_id->device, - c->rc->sdata, client->msize, + c->rc.sdata, client->msize, DMA_FROM_DEVICE); if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) goto error; @@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) **/ if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { - /* Got one ! */ - kfree(req->rc); - req->rc = NULL; + /* Got one! */ + p9_fcall_fini(&req->rc); + req->rc.sdata = NULL; goto dont_need_post_recv; } else { /* We raced and lost. */ @@ -459,7 +460,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) err = -ENOMEM; goto recv_error; } - rpl_context->rc = req->rc; + rpl_context->rc.sdata = req->rc.sdata; /* * Post a receive buffer for this request. We need to ensure @@ -479,7 +480,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) goto recv_error; } /* remove posted receive buffer from request structure */ - req->rc = NULL; + req->rc.sdata = NULL; dont_need_post_recv: /* Post the request */ @@ -491,7 +492,7 @@ dont_need_post_recv: c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, - c->req->tc->sdata, c->req->tc->size, + c->req->tc.sdata, c->req->tc.size, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { err = -EIO; @@ -501,7 +502,7 @@ dont_need_post_recv: c->cqe.done = send_done; sge.addr = c->busa; - sge.length = c->req->tc->size; + sge.length = c->req->tc.size; sge.lkey = rdma->pd->local_dma_lkey; wr.next = NULL; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 7728b0acde09..3dd6ce1c0f2d 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -155,7 +155,7 @@ static void req_done(struct virtqueue *vq) } if (len) { - req->rc->size = len; + req->rc.size = len; p9_client_cb(chan->client, req, REQ_STATUS_RCVD); } } @@ -273,12 +273,12 @@ req_retry: out_sgs = in_sgs = 0; /* Handle out VirtIO ring buffers */ out = pack_sg_list(chan->sg, 0, - VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + VIRTQUEUE_NUM, req->tc.sdata, req->tc.size); if (out) sgs[out_sgs++] = chan->sg; in = pack_sg_list(chan->sg, out, - VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); + VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity); if (in) sgs[out_sgs + in_sgs++] = chan->sg + out; @@ -416,15 +416,15 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE); if (n != outlen) { __le32 v = cpu_to_le32(n); - memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4); + memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4); outlen = n; } /* The size field of the message must include the length of the * header and the length of the data. We didn't actually know * the length of the data until this point so add it in now. */ - sz = cpu_to_le32(req->tc->size + outlen); - memcpy(&req->tc->sdata[0], &sz, sizeof(sz)); + sz = cpu_to_le32(req->tc.size + outlen); + memcpy(&req->tc.sdata[0], &sz, sizeof(sz)); } else if (uidata) { int n = p9_get_mapped_pages(chan, &in_pages, uidata, inlen, &offs, &need_drop); @@ -433,7 +433,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE); if (n != inlen) { __le32 v = cpu_to_le32(n); - memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4); + memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4); inlen = n; } } @@ -445,7 +445,7 @@ req_retry_pinned: /* out data */ out = pack_sg_list(chan->sg, 0, - VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + VIRTQUEUE_NUM, req->tc.sdata, req->tc.size); if (out) sgs[out_sgs++] = chan->sg; @@ -464,7 +464,7 @@ req_retry_pinned: * alloced memory and payload onto the user buffer. */ in = pack_sg_list(chan->sg, out, - VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); + VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len); if (in) sgs[out_sgs + in_sgs++] = chan->sg + out; diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 843cb823d9b9..782a07f2ad0c 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -141,7 +141,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) struct xen_9pfs_front_priv *priv = NULL; RING_IDX cons, prod, masked_cons, masked_prod; unsigned long flags; - u32 size = p9_req->tc->size; + u32 size = p9_req->tc.size; struct xen_9pfs_dataring *ring; int num; @@ -154,7 +154,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) if (!priv || priv->client != client) return -EINVAL; - num = p9_req->tc->tag % priv->num_rings; + num = p9_req->tc.tag % priv->num_rings; ring = &priv->rings[num]; again: @@ -176,7 +176,7 @@ again: masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE); masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); - xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size, + xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size, &masked_prod, masked_cons, XEN_9PFS_RING_SIZE); p9_req->status = REQ_STATUS_SENT; @@ -229,12 +229,12 @@ static void p9_xen_response(struct work_struct *work) continue; } - memcpy(req->rc, &h, sizeof(h)); - req->rc->offset = 0; + memcpy(&req->rc, &h, sizeof(h)); + req->rc.offset = 0; masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); /* Then, read the whole packet (including the header) */ - xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size, + xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size, masked_prod, &masked_cons, XEN_9PFS_RING_SIZE); -- cgit v1.2.3 From 91a76be37ff89795526c452a6799576b03bec501 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 30 Jul 2018 15:14:37 +0900 Subject: 9p: add a per-client fcall kmem_cache Having a specific cache for the fcall allocations helps speed up end-to-end latency. The caches will automatically be merged if there are multiple caches of items with the same size so we do not need to try to share a cache between different clients of the same size. Since the msize is negotiated with the server, only allocate the cache after that negotiation has happened - previous allocations or allocations of different sizes (e.g. zero-copy fcall) are made with kmalloc directly. Some figures on two beefy VMs with Connect-IB (sriov) / trans=rdma, with ior running 32 processes in parallel doing small 32 bytes IOs: - no alloc (4.18-rc7 request cache): 65.4k req/s - non-power of two alloc, no patch: 61.6k req/s - power of two alloc, no patch: 62.2k req/s - non-power of two alloc, with patch: 64.7k req/s - power of two alloc, with patch: 65.1k req/s Link: http://lkml.kernel.org/r/1532943263-24378-2-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Acked-by: Jun Piao Cc: Matthew Wilcox Cc: Greg Kurz --- include/net/9p/9p.h | 4 ++++ include/net/9p/client.h | 1 + net/9p/client.c | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 37 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index e23896116d9a..beede1e1a919 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -336,6 +336,9 @@ enum p9_qid_t { #define P9_NOFID (u32)(~0) #define P9_MAXWELEM 16 +/* Minimal header size: size[4] type[1] tag[2] */ +#define P9_HDRSZ 7 + /* ample room for Twrite/Rread header */ #define P9_IOHDRSZ 24 @@ -558,6 +561,7 @@ struct p9_fcall { size_t offset; size_t capacity; + struct kmem_cache *cache; u8 *sdata; }; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index c2671d40bb6b..735f3979d559 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -123,6 +123,7 @@ struct p9_client { struct p9_trans_module *trans_mod; enum p9_trans_status status; void *trans; + struct kmem_cache *fcall_cache; union { struct { diff --git a/net/9p/client.c b/net/9p/client.c index ed78751aee7c..f3dff8758ed7 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -231,9 +231,16 @@ free_and_return: return ret; } -static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize) +static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc, + int alloc_msize) { - fc->sdata = kmalloc(alloc_msize, GFP_NOFS); + if (likely(c->fcall_cache) && alloc_msize == c->msize) { + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS); + fc->cache = c->fcall_cache; + } else { + fc->sdata = kmalloc(alloc_msize, GFP_NOFS); + fc->cache = NULL; + } if (!fc->sdata) return -ENOMEM; fc->capacity = alloc_msize; @@ -242,7 +249,16 @@ static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize) void p9_fcall_fini(struct p9_fcall *fc) { - kfree(fc->sdata); + /* sdata can be NULL for interrupted requests in trans_rdma, + * and kmem_cache_free does not do NULL-check for us + */ + if (unlikely(!fc->sdata)) + return; + + if (fc->cache) + kmem_cache_free(fc->cache, fc->sdata); + else + kfree(fc->sdata); } EXPORT_SYMBOL(p9_fcall_fini); @@ -267,9 +283,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) if (!req) return NULL; - if (p9_fcall_init(&req->tc, alloc_msize)) + if (p9_fcall_init(c, &req->tc, alloc_msize)) goto free_req; - if (p9_fcall_init(&req->rc, alloc_msize)) + if (p9_fcall_init(c, &req->rc, alloc_msize)) goto free; p9pdu_reset(&req->tc); @@ -951,6 +967,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) clnt->trans_mod = NULL; clnt->trans = NULL; + clnt->fcall_cache = NULL; client_id = utsname()->nodename; memcpy(clnt->name, client_id, strlen(client_id) + 1); @@ -987,6 +1004,15 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto close_trans; + /* P9_HDRSZ + 4 is the smallest packet header we can have that is + * followed by data accessed from userspace by read + */ + clnt->fcall_cache = + kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize, + 0, 0, P9_HDRSZ + 4, + clnt->msize - (P9_HDRSZ + 4), + NULL); + return clnt; close_trans: @@ -1018,6 +1044,7 @@ void p9_client_destroy(struct p9_client *clnt) p9_tag_cleanup(clnt); + kmem_cache_destroy(clnt->fcall_cache); kfree(clnt); } EXPORT_SYMBOL(p9_client_destroy); -- cgit v1.2.3 From 43cbcbee9938b17f77cf34f1bc12d302f456810f Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Sat, 11 Aug 2018 16:42:53 +0200 Subject: 9p: rename p9_free_req() function In sight of the next patch to add a refcount in p9_req_t, rename the p9_free_req() function in p9_release_req(). In the next patch the actual kfree will be moved to another function. Link: http://lkml.kernel.org/r/20180811144254.23665-1-tomasbortoli@gmail.com Signed-off-by: Tomas Bortoli Acked-by: Jun Piao Signed-off-by: Dominique Martinet --- net/9p/client.c | 100 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index f3dff8758ed7..96c105841075 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -341,13 +341,13 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) EXPORT_SYMBOL(p9_tag_lookup); /** - * p9_free_req - Free a request. + * p9_tag_remove - Remove a tag. * @c: Client session. - * @r: Request to free. + * @r: Request of reference. * * Context: Any context. */ -static void p9_free_req(struct p9_client *c, struct p9_req_t *r) +static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r) { unsigned long flags; u16 tag = r->tc.tag; @@ -376,7 +376,7 @@ static void p9_tag_cleanup(struct p9_client *c) rcu_read_lock(); idr_for_each_entry(&c->reqs, req, id) { pr_info("Tag %d still in use\n", id); - p9_free_req(c, req); + p9_tag_remove(c, req); } rcu_read_unlock(); } @@ -644,7 +644,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (c->trans_mod->cancelled) c->trans_mod->cancelled(c, oldreq); - p9_free_req(c, req); + p9_tag_remove(c, req); return 0; } @@ -678,7 +678,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, trace_9p_client_req(c, type, req->tc.tag); return req; reterr: - p9_free_req(c, req); + p9_tag_remove(c, req); return ERR_PTR(err); } @@ -688,7 +688,7 @@ reterr: * @type: type of request * @fmt: protocol format string (see protocol.c) * - * Returns request structure (which client must free using p9_free_req) + * Returns request structure (which client must free using p9_tag_remove) */ static struct p9_req_t * @@ -764,7 +764,7 @@ recalc_sigpending: if (!err) return req; reterr: - p9_free_req(c, req); + p9_tag_remove(c, req); return ERR_PTR(safe_errno(err)); } @@ -779,7 +779,7 @@ reterr: * @hdrlen: reader header size, This is the size of response protocol data * @fmt: protocol format string (see protocol.c) * - * Returns request structure (which client must free using p9_free_req) + * Returns request structure (which client must free using p9_tag_remove) */ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, struct iov_iter *uidata, @@ -846,7 +846,7 @@ recalc_sigpending: if (!err) return req; reterr: - p9_free_req(c, req); + p9_tag_remove(c, req); return ERR_PTR(safe_errno(err)); } @@ -949,7 +949,7 @@ static int p9_client_version(struct p9_client *c) error: kfree(version); - p9_free_req(c, req); + p9_tag_remove(c, req); return err; } @@ -1091,7 +1091,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid); if (err) { trace_9p_protocol_dump(clnt, &req->rc); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); goto error; } @@ -1100,7 +1100,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, memmove(&fid->qid, &qid, sizeof(struct p9_qid)); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return fid; error: @@ -1148,10 +1148,10 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { trace_9p_protocol_dump(clnt, &req->rc); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); goto clunk_fid; } - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); @@ -1226,7 +1226,7 @@ int p9_client_open(struct p9_fid *fid, int mode) fid->iounit = iounit; free_and_error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1271,7 +1271,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 ofid->iounit = iounit; free_and_error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1316,7 +1316,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, fid->iounit = iounit; free_and_error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1350,7 +1350,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name, qid->type, (unsigned long long)qid->path, qid->version); free_and_error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1370,7 +1370,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newna return PTR_ERR(req); p9_debug(P9_DEBUG_9P, "<<< RLINK\n"); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return 0; } EXPORT_SYMBOL(p9_client_link); @@ -1394,7 +1394,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; @@ -1429,7 +1429,7 @@ again: p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: /* * Fid is not valid even after a failed clunk @@ -1463,7 +1463,7 @@ int p9_client_remove(struct p9_fid *fid) p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: if (err == -ERESTARTSYS) p9_client_clunk(fid); @@ -1490,7 +1490,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) } p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1542,7 +1542,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) "D", &count, &dataptr); if (*err) { trace_9p_protocol_dump(clnt, &req->rc); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); break; } if (rsize < count) { @@ -1552,7 +1552,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); if (!count) { - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); break; } @@ -1562,7 +1562,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) offset += n; if (n != count) { *err = -EFAULT; - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); break; } } else { @@ -1570,7 +1570,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) total += count; offset += count; } - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); } return total; } @@ -1614,7 +1614,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count); if (*err) { trace_9p_protocol_dump(clnt, &req->rc); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); break; } if (rsize < count) { @@ -1624,7 +1624,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); iov_iter_advance(from, count); total += count; offset += count; @@ -1658,7 +1658,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { trace_9p_protocol_dump(clnt, &req->rc); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); goto error; } @@ -1675,7 +1675,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) from_kgid(&init_user_ns, ret->n_gid), from_kuid(&init_user_ns, ret->n_muid)); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return ret; error: @@ -1711,7 +1711,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret); if (err) { trace_9p_protocol_dump(clnt, &req->rc); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); goto error; } @@ -1736,7 +1736,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, ret->st_gen, ret->st_data_version); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return ret; error: @@ -1805,7 +1805,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1837,7 +1837,7 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) goto error; } p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1865,7 +1865,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); if (err) { trace_9p_protocol_dump(clnt, &req->rc); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); goto error; } @@ -1876,7 +1876,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree, sb->fsid, (long int)sb->namelen); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1904,7 +1904,7 @@ int p9_client_rename(struct p9_fid *fid, p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1934,7 +1934,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", newdirfid->fid, new_name); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -1971,10 +1971,10 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size); if (err) { trace_9p_protocol_dump(clnt, &req->rc); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); goto clunk_fid; } - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", attr_fid->fid, *attr_size); return attr_fid; @@ -2008,7 +2008,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name, goto error; } p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -2071,11 +2071,11 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (non_zc) memmove(data, dataptr, count); - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return count; free_and_error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); error: return err; } @@ -2106,7 +2106,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, (unsigned long long)qid->path, qid->version); error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return err; } @@ -2137,7 +2137,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, (unsigned long long)qid->path, qid->version); error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return err; } @@ -2170,7 +2170,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) } p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return err; } @@ -2205,7 +2205,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) "proc_id %d client_id %s\n", glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return err; } EXPORT_SYMBOL(p9_client_getlock_dotl); @@ -2231,7 +2231,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target) } p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); error: - p9_free_req(clnt, req); + p9_tag_remove(clnt, req); return err; } EXPORT_SYMBOL(p9_client_readlink); -- cgit v1.2.3 From 728356dedeff8ef999cb436c71333ef4ac51a81c Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Tue, 14 Aug 2018 19:43:42 +0200 Subject: 9p: Add refcount to p9_req_t To avoid use-after-free(s), use a refcount to keep track of the usable references to any instantiated struct p9_req_t. This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as wrappers to kref_put(), kref_get() and kref_get_unless_zero(). These are used by the client and the transports to keep track of valid requests' references. p9_free_req() is added back and used as callback by kref_put(). Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by kmem_cache_free() will not be reused for another type until the rcu synchronisation period is over, so an address gotten under rcu read lock is safe to inc_ref() without corrupting random memory while the lock is held. Link: http://lkml.kernel.org/r/1535626341-20693-1-git-send-email-asmadeus@codewreck.org Co-developed-by: Dominique Martinet Signed-off-by: Tomas Bortoli Reported-by: syzbot+467050c1ce275af2a5b8@syzkaller.appspotmail.com Signed-off-by: Dominique Martinet --- include/net/9p/client.h | 14 ++++++++++++ net/9p/client.c | 57 +++++++++++++++++++++++++++++++++++++++++++------ net/9p/trans_fd.c | 11 +++++++++- net/9p/trans_rdma.c | 1 + net/9p/trans_virtio.c | 26 ++++++++++++++++++---- net/9p/trans_xen.c | 1 + 6 files changed, 98 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 735f3979d559..947a570307a6 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -94,6 +94,7 @@ enum p9_req_status_t { struct p9_req_t { int status; int t_err; + struct kref refcount; wait_queue_head_t wq; struct p9_fcall tc; struct p9_fcall rc; @@ -233,6 +234,19 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); void p9_fcall_fini(struct p9_fcall *fc); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); + +static inline void p9_req_get(struct p9_req_t *r) +{ + kref_get(&r->refcount); +} + +static inline int p9_req_try_get(struct p9_req_t *r) +{ + return kref_get_unless_zero(&r->refcount); +} + +int p9_req_put(struct p9_req_t *r); + void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int); diff --git a/net/9p/client.c b/net/9p/client.c index 96c105841075..47fa6158a75a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -307,6 +307,18 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) if (tag < 0) goto free; + /* Init ref to two because in the general case there is one ref + * that is put asynchronously by a writer thread, one ref + * temporarily given by p9_tag_lookup and put by p9_client_cb + * in the recv thread, and one ref put by p9_tag_remove in the + * main thread. The only exception is virtio that does not use + * p9_tag_lookup but does not have a writer thread either + * (the write happens synchronously in the request/zc_request + * callback), so p9_client_cb eats the second ref there + * as the pointer is duplicated directly by virtqueue_add_sgs() + */ + refcount_set(&req->refcount.refcount, 2); + return req; free: @@ -330,10 +342,21 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) struct p9_req_t *req; rcu_read_lock(); +again: req = idr_find(&c->reqs, tag); - /* There's no refcount on the req; a malicious server could cause - * us to dereference a NULL pointer - */ + if (req) { + /* We have to be careful with the req found under rcu_read_lock + * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the + * ref again without corrupting other data, then check again + * that the tag matches once we have the ref + */ + if (!p9_req_try_get(req)) + goto again; + if (req->tc.tag != tag) { + p9_req_put(req); + goto again; + } + } rcu_read_unlock(); return req; @@ -347,7 +370,7 @@ EXPORT_SYMBOL(p9_tag_lookup); * * Context: Any context. */ -static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r) +static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r) { unsigned long flags; u16 tag = r->tc.tag; @@ -356,11 +379,23 @@ static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r) spin_lock_irqsave(&c->lock, flags); idr_remove(&c->reqs, tag); spin_unlock_irqrestore(&c->lock, flags); + return p9_req_put(r); +} + +static void p9_req_free(struct kref *ref) +{ + struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount); p9_fcall_fini(&r->tc); p9_fcall_fini(&r->rc); kmem_cache_free(p9_req_cache, r); } +int p9_req_put(struct p9_req_t *r) +{ + return kref_put(&r->refcount, p9_req_free); +} +EXPORT_SYMBOL(p9_req_put); + /** * p9_tag_cleanup - cleans up tags structure and reclaims resources * @c: v9fs client struct @@ -376,7 +411,9 @@ static void p9_tag_cleanup(struct p9_client *c) rcu_read_lock(); idr_for_each_entry(&c->reqs, req, id) { pr_info("Tag %d still in use\n", id); - p9_tag_remove(c, req); + if (p9_tag_remove(c, req) == 0) + pr_warn("Packet with tag %d has still references", + req->tc.tag); } rcu_read_unlock(); } @@ -400,6 +437,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) wake_up(&req->wq); p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag); + p9_req_put(req); } EXPORT_SYMBOL(p9_client_cb); @@ -640,9 +678,10 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) * if we haven't received a response for oldreq, * remove it from the list */ - if (oldreq->status == REQ_STATUS_SENT) + if (oldreq->status == REQ_STATUS_SENT) { if (c->trans_mod->cancelled) c->trans_mod->cancelled(c, oldreq); + } p9_tag_remove(c, req); return 0; @@ -679,6 +718,8 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, return req; reterr: p9_tag_remove(c, req); + /* We have to put also the 2nd reference as it won't be used */ + p9_req_put(req); return ERR_PTR(err); } @@ -713,6 +754,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) err = c->trans_mod->request(c, req); if (err < 0) { + /* write won't happen */ + p9_req_put(req); if (err != -ERESTARTSYS && err != -EFAULT) c->status = Disconnected; goto recalc_sigpending; @@ -2238,7 +2281,7 @@ EXPORT_SYMBOL(p9_client_readlink); int __init p9_client_init(void) { - p9_req_cache = KMEM_CACHE(p9_req_t, 0); + p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU); return p9_req_cache ? 0 : -ENOMEM; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 51615c0fb744..aca528722183 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -132,6 +132,7 @@ struct p9_conn { struct list_head req_list; struct list_head unsent_req_list; struct p9_req_t *req; + struct p9_req_t *wreq; char tmp_buf[7]; struct p9_fcall rc; int wpos; @@ -383,6 +384,7 @@ static void p9_read_work(struct work_struct *work) m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; + p9_req_put(m->req); m->req = NULL; } @@ -472,6 +474,8 @@ static void p9_write_work(struct work_struct *work) m->wbuf = req->tc.sdata; m->wsize = req->tc.size; m->wpos = 0; + p9_req_get(req); + m->wreq = req; spin_unlock(&m->client->lock); } @@ -492,8 +496,11 @@ static void p9_write_work(struct work_struct *work) } m->wpos += err; - if (m->wpos == m->wsize) + if (m->wpos == m->wsize) { m->wpos = m->wsize = 0; + p9_req_put(m->wreq); + m->wreq = NULL; + } end_clear: clear_bit(Wworksched, &m->wsched); @@ -694,6 +701,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; + p9_req_put(req); ret = 0; } spin_unlock(&client->lock); @@ -711,6 +719,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) spin_lock(&client->lock); list_del(&req->req_list); spin_unlock(&client->lock); + p9_req_put(req); return 0; } diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 5b0cda1aaa7a..9cc9b3a19ee7 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -365,6 +365,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc) c->busa, c->req->tc.size, DMA_TO_DEVICE); up(&rdma->sq_sem); + p9_req_put(c->req); kfree(c); } diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 3dd6ce1c0f2d..eb596c2ed546 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -207,6 +207,13 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) return 1; } +/* Reply won't come, so drop req ref */ +static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + p9_req_put(req); + return 0; +} + /** * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, * this takes a list of pages. @@ -404,6 +411,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, struct scatterlist *sgs[4]; size_t offs; int need_drop = 0; + int kicked = 0; p9_debug(P9_DEBUG_TRANS, "virtio request\n"); @@ -411,8 +419,10 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, __le32 sz; int n = p9_get_mapped_pages(chan, &out_pages, uodata, outlen, &offs, &need_drop); - if (n < 0) - return n; + if (n < 0) { + err = n; + goto err_out; + } out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE); if (n != outlen) { __le32 v = cpu_to_le32(n); @@ -428,8 +438,10 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, } else if (uidata) { int n = p9_get_mapped_pages(chan, &in_pages, uidata, inlen, &offs, &need_drop); - if (n < 0) - return n; + if (n < 0) { + err = n; + goto err_out; + } in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE); if (n != inlen) { __le32 v = cpu_to_le32(n); @@ -498,6 +510,7 @@ req_retry_pinned: } virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); + kicked = 1; p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD); /* @@ -518,6 +531,10 @@ err_out: } kvfree(in_pages); kvfree(out_pages); + if (!kicked) { + /* reply won't come */ + p9_req_put(req); + } return err; } @@ -750,6 +767,7 @@ static struct p9_trans_module p9_virtio_trans = { .request = p9_virtio_request, .zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, + .cancelled = p9_virtio_cancelled, /* * We leave one entry for input and one entry for response * headers. We also skip one more entry to accomodate, address diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 782a07f2ad0c..e2fbf3677b9b 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -185,6 +185,7 @@ again: ring->intf->out_prod = prod; spin_unlock_irqrestore(&ring->lock, flags); notify_remote_via_irq(ring->irq); + p9_req_put(p9_req); return 0; } -- cgit v1.2.3 From 8b894adb2b7e1d1e64b8954569c761eaf3d51ab5 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Thu, 30 Aug 2018 19:29:36 +0900 Subject: 9p/rdma: do not disconnect on down_interruptible EAGAIN 9p/rdma would sometimes drop the connection and display errors in recv_done when the user does ^C. The errors were caused by recv buffers that were posted at the time of disconnect, and we just do not want to disconnect when down_interruptible is... interrupted. Link: http://lkml.kernel.org/r/1535625307-18019-1-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet --- net/9p/trans_rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 9cc9b3a19ee7..9719bc4d9424 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -477,7 +477,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) err = post_recv(client, rpl_context); if (err) { - p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); + p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err); goto recv_error; } /* remove posted receive buffer from request structure */ @@ -546,7 +546,7 @@ dont_need_post_recv: recv_error: kfree(rpl_context); spin_lock_irqsave(&rdma->req_lock, flags); - if (rdma->state < P9_RDMA_CLOSING) { + if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; spin_unlock_irqrestore(&rdma->req_lock, flags); rdma_disconnect(rdma->cm_id); -- cgit v1.2.3 From 6d35190f395316916c8bb4aabd35a182890bf856 Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Mon, 3 Sep 2018 18:03:21 +0200 Subject: 9p: Rename req to rreq in trans_fd In struct p9_conn, rename req to rreq as it is used by the read routine. Link: http://lkml.kernel.org/r/20180903160321.2181-1-tomasbortoli@gmail.com Signed-off-by: Tomas Bortoli Suggested-by: Jun Piao Signed-off-by: Dominique Martinet --- net/9p/trans_fd.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index aca528722183..12559c474dde 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -131,7 +131,7 @@ struct p9_conn { int err; struct list_head req_list; struct list_head unsent_req_list; - struct p9_req_t *req; + struct p9_req_t *rreq; struct p9_req_t *wreq; char tmp_buf[7]; struct p9_fcall rc; @@ -323,7 +323,7 @@ static void p9_read_work(struct work_struct *work) m->rc.offset += err; /* header read in */ - if ((!m->req) && (m->rc.offset == m->rc.capacity)) { + if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ @@ -347,23 +347,23 @@ static void p9_read_work(struct work_struct *work) "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); - m->req = p9_tag_lookup(m->client, m->rc.tag); - if (!m->req || (m->req->status != REQ_STATUS_SENT)) { + m->rreq = p9_tag_lookup(m->client, m->rc.tag); + if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) { p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", m->rc.tag); err = -EIO; goto error; } - if (!m->req->rc.sdata) { + if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", - m->rc.tag, m->req); - m->req = NULL; + m->rc.tag, m->rreq); + m->rreq = NULL; err = -EIO; goto error; } - m->rc.sdata = m->req->rc.sdata; + m->rc.sdata = m->rreq->rc.sdata; memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity); m->rc.capacity = m->rc.size; } @@ -371,21 +371,21 @@ static void p9_read_work(struct work_struct *work) /* packet is read in * not an else because some packets (like clunk) have no payload */ - if ((m->req) && (m->rc.offset == m->rc.capacity)) { + if ((m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new packet\n"); - m->req->rc.size = m->rc.offset; + m->rreq->rc.size = m->rc.offset; spin_lock(&m->client->lock); - if (m->req->status != REQ_STATUS_ERROR) + if (m->rreq->status != REQ_STATUS_ERROR) status = REQ_STATUS_RCVD; - list_del(&m->req->req_list); + list_del(&m->rreq->req_list); /* update req->status while holding client->lock */ - p9_client_cb(m->client, m->req, status); + p9_client_cb(m->client, m->rreq, status); spin_unlock(&m->client->lock); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; - p9_req_put(m->req); - m->req = NULL; + p9_req_put(m->rreq); + m->rreq = NULL; } end_clear: -- cgit v1.2.3 From 2803cf4379ed252894f046cb8812a48db35294e3 Mon Sep 17 00:00:00 2001 From: Gertjan Halkes Date: Wed, 5 Sep 2018 15:41:29 +0900 Subject: 9p: do not trust pdu content for stat item size v9fs_dir_readdir() could deadloop if a struct was sent with a size set to -2 Link: http://lkml.kernel.org/r/1536134432-11997-1-git-send-email-asmadeus@codewreck.org Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=88021 Signed-off-by: Gertjan Halkes Signed-off-by: Dominique Martinet --- fs/9p/vfs_dir.c | 8 +++----- net/9p/protocol.c | 3 ++- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 48db9a9f13f9..cb6c4031af55 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -105,7 +105,6 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) int err = 0; struct p9_fid *fid; int buflen; - int reclen = 0; struct p9_rdir *rdir; struct kvec kvec; @@ -138,11 +137,10 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) while (rdir->head < rdir->tail) { err = p9stat_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &st); - if (err) { + if (err <= 0) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); return -EIO; } - reclen = st.size+2; over = !dir_emit(ctx, st.name, strlen(st.name), v9fs_qid2ino(&st.qid), dt_type(&st)); @@ -150,8 +148,8 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) if (over) return 0; - rdir->head += reclen; - ctx->pos += reclen; + rdir->head += err; + ctx->pos += err; } } } diff --git a/net/9p/protocol.c b/net/9p/protocol.c index ee32bbf12675..b4d80c533f89 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -571,9 +571,10 @@ int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) if (ret) { p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); + return ret; } - return ret; + return fake_pdu.offset; } EXPORT_SYMBOL(p9stat_read); -- cgit v1.2.3 From 473c7dd1d7b59ff8f88a5154737e3eac78a96e5b Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Sat, 8 Sep 2018 00:26:50 +0900 Subject: 9p/rdma: remove useless check in cm_event_handler the client c is always dereferenced to get the rdma struct, so c has to be a valid pointer at this point. Gcc would optimize that away but let's make coverity happy... Link: http://lkml.kernel.org/r/1536339057-21974-3-git-send-email-asmadeus@codewreck.org Addresses-Coverity-ID: 102778 ("Dereference before null check") Signed-off-by: Dominique Martinet --- net/9p/trans_rdma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 9719bc4d9424..119103bfa82e 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -274,8 +274,7 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DISCONNECTED: if (rdma) rdma->state = P9_RDMA_CLOSED; - if (c) - c->status = Disconnected; + c->status = Disconnected; break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: -- cgit v1.2.3 From ef5305f1f72eb1cfcda25c382bb0368509c0385b Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Sat, 8 Sep 2018 00:36:08 +0900 Subject: 9p: p9dirent_read: check network-provided name length strcpy to dirent->d_name could overflow the buffer, use strscpy to check the provided string length and error out if the size was too big. While we are here, make the function return an error when the pdu parsing failed, instead of returning the pdu offset as if it had been a success... Link: http://lkml.kernel.org/r/1536339057-21974-4-git-send-email-asmadeus@codewreck.org Addresses-Coverity-ID: 139133 ("Copy into fixed size buffer") Signed-off-by: Dominique Martinet --- net/9p/protocol.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/protocol.c b/net/9p/protocol.c index b4d80c533f89..462ba144cb39 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -623,13 +623,19 @@ int p9dirent_read(struct p9_client *clnt, char *buf, int len, if (ret) { p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); - goto out; + return ret; } - strcpy(dirent->d_name, nameptr); + ret = strscpy(dirent->d_name, nameptr, sizeof(dirent->d_name)); + if (ret < 0) { + p9_debug(P9_DEBUG_ERROR, + "On the wire dirent name too long: %s\n", + nameptr); + kfree(nameptr); + return ret; + } kfree(nameptr); -out: return fake_pdu.offset; } EXPORT_SYMBOL(p9dirent_read); -- cgit v1.2.3 From 30382d6ce593f863292cd7c026a79077c4e7280f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:43 -0400 Subject: SUNRPC: Remove the server 'authtab_lock' and just use RCU Module removal is RCU safe by design, so we really have no need to lock the 'authtab[]' array. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth.c | 52 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index bb8db3cb8032..f83443856cd1 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -27,12 +27,32 @@ extern struct auth_ops svcauth_null; extern struct auth_ops svcauth_unix; -static DEFINE_SPINLOCK(authtab_lock); -static struct auth_ops *authtab[RPC_AUTH_MAXFLAVOR] = { - [0] = &svcauth_null, - [1] = &svcauth_unix, +static struct auth_ops __rcu *authtab[RPC_AUTH_MAXFLAVOR] = { + [RPC_AUTH_NULL] = (struct auth_ops __force __rcu *)&svcauth_null, + [RPC_AUTH_UNIX] = (struct auth_ops __force __rcu *)&svcauth_unix, }; +static struct auth_ops * +svc_get_auth_ops(rpc_authflavor_t flavor) +{ + struct auth_ops *aops; + + if (flavor >= RPC_AUTH_MAXFLAVOR) + return NULL; + rcu_read_lock(); + aops = rcu_dereference(authtab[flavor]); + if (aops != NULL && !try_module_get(aops->owner)) + aops = NULL; + rcu_read_unlock(); + return aops; +} + +static void +svc_put_auth_ops(struct auth_ops *aops) +{ + module_put(aops->owner); +} + int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) { @@ -45,14 +65,11 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) dprintk("svc: svc_authenticate (%d)\n", flavor); - spin_lock(&authtab_lock); - if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) || - !try_module_get(aops->owner)) { - spin_unlock(&authtab_lock); + aops = svc_get_auth_ops(flavor); + if (aops == NULL) { *authp = rpc_autherr_badcred; return SVC_DENIED; } - spin_unlock(&authtab_lock); rqstp->rq_auth_slack = 0; init_svc_cred(&rqstp->rq_cred); @@ -82,7 +99,7 @@ int svc_authorise(struct svc_rqst *rqstp) if (aops) { rv = aops->release(rqstp); - module_put(aops->owner); + svc_put_auth_ops(aops); } return rv; } @@ -90,13 +107,14 @@ int svc_authorise(struct svc_rqst *rqstp) int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops) { + struct auth_ops *old; int rv = -EINVAL; - spin_lock(&authtab_lock); - if (flavor < RPC_AUTH_MAXFLAVOR && authtab[flavor] == NULL) { - authtab[flavor] = aops; - rv = 0; + + if (flavor < RPC_AUTH_MAXFLAVOR) { + old = cmpxchg((struct auth_ops ** __force)&authtab[flavor], NULL, aops); + if (old == NULL || old == aops) + rv = 0; } - spin_unlock(&authtab_lock); return rv; } EXPORT_SYMBOL_GPL(svc_auth_register); @@ -104,10 +122,8 @@ EXPORT_SYMBOL_GPL(svc_auth_register); void svc_auth_unregister(rpc_authflavor_t flavor) { - spin_lock(&authtab_lock); if (flavor < RPC_AUTH_MAXFLAVOR) - authtab[flavor] = NULL; - spin_unlock(&authtab_lock); + rcu_assign_pointer(authtab[flavor], NULL); } EXPORT_SYMBOL_GPL(svc_auth_unregister); -- cgit v1.2.3 From 608a0ab2f54ab0e301ad76a41aad979ea0d02670 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:44 -0400 Subject: SUNRPC: Add lockless lookup of the server's auth domain Avoid taking the global auth_domain_lock in most lookups of the auth domain by adding an RCU protected lookup. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcauth.h | 1 + net/sunrpc/auth_gss/svcauth_gss.c | 9 ++++++++- net/sunrpc/svcauth.c | 22 +++++++++++++++++++--- net/sunrpc/svcauth_unix.c | 10 ++++++++-- 4 files changed, 36 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 04e404a07882..3e53a6e2ada7 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -82,6 +82,7 @@ struct auth_domain { struct hlist_node hash; char *name; struct auth_ops *flavour; + struct rcu_head rcu_head; }; /* diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 860f2a1bbb67..87c71fb0f0ea 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1764,14 +1764,21 @@ out_err: } static void -svcauth_gss_domain_release(struct auth_domain *dom) +svcauth_gss_domain_release_rcu(struct rcu_head *head) { + struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head); struct gss_domain *gd = container_of(dom, struct gss_domain, h); kfree(dom->name); kfree(gd); } +static void +svcauth_gss_domain_release(struct auth_domain *dom) +{ + call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu); +} + static struct auth_ops svcauthops_gss = { .name = "rpcsec_gss", .owner = THIS_MODULE, diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index f83443856cd1..775b8c94265b 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -143,10 +143,11 @@ static struct hlist_head auth_domain_table[DN_HASHMAX]; static DEFINE_SPINLOCK(auth_domain_lock); static void auth_domain_release(struct kref *kref) + __releases(&auth_domain_lock) { struct auth_domain *dom = container_of(kref, struct auth_domain, ref); - hlist_del(&dom->hash); + hlist_del_rcu(&dom->hash); dom->flavour->domain_release(dom); spin_unlock(&auth_domain_lock); } @@ -175,7 +176,7 @@ auth_domain_lookup(char *name, struct auth_domain *new) } } if (new) - hlist_add_head(&new->hash, head); + hlist_add_head_rcu(&new->hash, head); spin_unlock(&auth_domain_lock); return new; } @@ -183,6 +184,21 @@ EXPORT_SYMBOL_GPL(auth_domain_lookup); struct auth_domain *auth_domain_find(char *name) { - return auth_domain_lookup(name, NULL); + struct auth_domain *hp; + struct hlist_head *head; + + head = &auth_domain_table[hash_str(name, DN_HASHBITS)]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(hp, head, hash) { + if (strcmp(hp->name, name)==0) { + if (!kref_get_unless_zero(&hp->ref)) + hp = NULL; + rcu_read_unlock(); + return hp; + } + } + rcu_read_unlock(); + return NULL; } EXPORT_SYMBOL_GPL(auth_domain_find); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index af7f28fb8102..84cf39021a03 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -37,20 +37,26 @@ struct unix_domain { extern struct auth_ops svcauth_null; extern struct auth_ops svcauth_unix; -static void svcauth_unix_domain_release(struct auth_domain *dom) +static void svcauth_unix_domain_release_rcu(struct rcu_head *head) { + struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head); struct unix_domain *ud = container_of(dom, struct unix_domain, h); kfree(dom->name); kfree(ud); } +static void svcauth_unix_domain_release(struct auth_domain *dom) +{ + call_rcu(&dom->rcu_head, svcauth_unix_domain_release_rcu); +} + struct auth_domain *unix_domain_find(char *name) { struct auth_domain *rv; struct unix_domain *new = NULL; - rv = auth_domain_lookup(name, NULL); + rv = auth_domain_find(name); while(1) { if (rv) { if (new && rv != &new->h) -- cgit v1.2.3 From b92a8fababa9d18910a54b957be55fef7f603531 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:45 -0400 Subject: SUNRPC: Refactor sunrpc_cache_lookup This is a trivial split into lookup and insert functions, no change in behavior. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 109fbe591e7b..aa8e62d61f4d 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,13 +54,11 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } -struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, - struct cache_head *key, int hash) +static struct cache_head *sunrpc_cache_find(struct cache_detail *detail, + struct cache_head *key, int hash) { - struct cache_head *new = NULL, *freeme = NULL, *tmp = NULL; - struct hlist_head *head; - - head = &detail->hash_table[hash]; + struct hlist_head *head = &detail->hash_table[hash]; + struct cache_head *tmp; read_lock(&detail->hash_lock); @@ -75,7 +73,15 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, } } read_unlock(&detail->hash_lock); - /* Didn't find anything, insert an empty entry */ + return NULL; +} + +static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, + struct cache_head *key, + int hash) +{ + struct cache_head *new, *tmp, *freeme = NULL; + struct hlist_head *head = &detail->hash_table[hash]; new = detail->alloc(); if (!new) @@ -114,8 +120,19 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, cache_put(freeme, detail); return new; } -EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); +struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, + struct cache_head *key, int hash) +{ + struct cache_head *ret; + + ret = sunrpc_cache_find(detail, key, hash); + if (ret) + return ret; + /* Didn't find anything, insert an empty entry */ + return sunrpc_cache_add_entry(detail, key, hash); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); -- cgit v1.2.3 From 72ea0321088df2c41eca8cc6160c24bcceb56ac7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 Sep 2018 13:39:34 +0300 Subject: 9p: potential NULL dereference p9_tag_alloc() is supposed to return error pointers, but we accidentally return a NULL here. It would cause a NULL dereference in the caller. Link: http://lkml.kernel.org/m/20180926103934.GA14535@mwanda Fixes: 996d5b4db4b1 ("9p: Use a slab for allocating requests") Signed-off-by: Dan Carpenter Signed-off-by: Dominique Martinet --- net/9p/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 47fa6158a75a..5f23e18eecc0 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -281,7 +281,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) int tag; if (!req) - return NULL; + return ERR_PTR(-ENOMEM); if (p9_fcall_init(c, &req->tc, alloc_msize)) goto free_req; -- cgit v1.2.3 From e4ca13f7d075e551dc158df6af18fb412a1dba0a Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 9 Oct 2018 11:18:52 +0900 Subject: 9p/trans_fd: abort p9_read_work if req status changed p9_read_work would try to handle an errored req even if it got put to error state by another thread between the lookup (that worked) and the time it had been fully read. The request itself is safe to use because we hold a ref to it from the lookup (for m->rreq, so it was safe to read into the request data buffer until this point), but the req_list has been deleted at the same time status changed, and client_cb already has been called as well, so we should not do either. Link: http://lkml.kernel.org/r/1539057956-23741-1-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Reported-by: syzbot+2222c34dc40b515f30dc@syzkaller.appspotmail.com Cc: Eric Van Hensbergen Cc: Latchesar Ionkov --- net/9p/trans_fd.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 12559c474dde..a0317d459cde 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -292,7 +292,6 @@ static void p9_read_work(struct work_struct *work) __poll_t n; int err; struct p9_conn *m; - int status = REQ_STATUS_ERROR; m = container_of(work, struct p9_conn, rq); @@ -375,11 +374,17 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "got new packet\n"); m->rreq->rc.size = m->rc.offset; spin_lock(&m->client->lock); - if (m->rreq->status != REQ_STATUS_ERROR) - status = REQ_STATUS_RCVD; - list_del(&m->rreq->req_list); - /* update req->status while holding client->lock */ - p9_client_cb(m->client, m->rreq, status); + if (m->rreq->status == REQ_STATUS_SENT) { + list_del(&m->rreq->req_list); + p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD); + } else { + spin_unlock(&m->client->lock); + p9_debug(P9_DEBUG_ERROR, + "Request tag %d errored out while we were reading the reply\n", + m->rc.tag); + err = -EIO; + goto error; + } spin_unlock(&m->client->lock); m->rc.sdata = NULL; m->rc.offset = 0; -- cgit v1.2.3 From fb488fc1f2b4c5128540b032892ddec91edaf8d9 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 9 Oct 2018 11:38:00 +0900 Subject: 9p/trans_fd: put worker reqs on destroy p9_read_work/p9_write_work might still hold references to a req after having been cancelled; make sure we put any of these to avoid potential request leak on disconnect. Fixes: 728356dedeff8 ("9p: Add refcount to p9_req_t") Link: http://lkml.kernel.org/r/1539057956-23741-2-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Cc: Eric Van Hensbergen Cc: Latchesar Ionkov Reviewed-by: Tomas Bortoli --- net/9p/trans_fd.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index a0317d459cde..f868cf6fba79 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -876,7 +876,15 @@ static void p9_conn_destroy(struct p9_conn *m) p9_mux_poll_stop(m); cancel_work_sync(&m->rq); + if (m->rreq) { + p9_req_put(m->rreq); + m->rreq = NULL; + } cancel_work_sync(&m->wq); + if (m->wreq) { + p9_req_put(m->wreq); + m->wreq = NULL; + } p9_conn_cancel(m, -ECONNRESET); -- cgit v1.2.3 From 24639ce56040a8ea890ad8836068c1ad8bd177c7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 26 Sep 2018 19:12:07 +0200 Subject: libceph: osd_req_op_cls_init() doesn't need to take opcode Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 3 +-- include/linux/ceph/osd_client.h | 5 ++--- net/ceph/osd_client.c | 9 ++++----- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f2fe692dda40..9cc7ee3b427f 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2374,8 +2374,7 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) if (!obj_req->osd_req) return -ENOMEM; - ret = osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd", - "copyup"); + ret = osd_req_op_cls_init(obj_req->osd_req, 0, "rbd", "copyup"); if (ret) return ret; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 02096da01845..f3e828460c91 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -444,9 +444,8 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); -extern int osd_req_op_cls_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode, - const char *class, const char *method); +int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, + const char *class, const char *method); extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *name, const void *value, size_t size, u8 cmp_op, u8 cmp_mode); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 60934bd8796c..a871b234cd90 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -767,15 +767,14 @@ void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, EXPORT_SYMBOL(osd_req_op_extent_dup_last); int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, - u16 opcode, const char *class, const char *method) + const char *class, const char *method) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - opcode, 0); + struct ceph_osd_req_op *op; struct ceph_pagelist *pagelist; size_t payload_len = 0; size_t size; - BUG_ON(opcode != CEPH_OSD_OP_CALL); + op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS); if (!pagelist) @@ -4962,7 +4961,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, if (ret) goto out_put_req; - ret = osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method); + ret = osd_req_op_cls_init(req, 0, class, method); if (ret) goto out_put_req; -- cgit v1.2.3 From 33165d472310262d8c79c7e4d1a17dc60cea7e35 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 28 Sep 2018 15:38:34 +0200 Subject: libceph: introduce ceph_pagelist_alloc() struct ceph_pagelist cannot be embedded into anything else because it has its own refcount. Merge allocation and initialization together. Signed-off-by: Ilya Dryomov --- fs/ceph/acl.c | 3 +-- fs/ceph/mds_client.c | 3 +-- fs/ceph/xattr.c | 3 +-- include/linux/ceph/pagelist.h | 11 +---------- net/ceph/osd_client.c | 14 ++++---------- net/ceph/pagelist.c | 20 ++++++++++++++++++++ 6 files changed, 28 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 8a9b562ae4ca..5f0103f40079 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -206,10 +206,9 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL); if (!tmp_buf) goto out_err; - pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL); + pagelist = ceph_pagelist_alloc(GFP_KERNEL); if (!pagelist) goto out_err; - ceph_pagelist_init(pagelist); err = ceph_pagelist_reserve(pagelist, PAGE_SIZE); if (err) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index bc43c822426a..580a79b9a91f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3126,10 +3126,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, pr_info("mds%d reconnect start\n", mds); - pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); + pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!pagelist) goto fail_nopagelist; - ceph_pagelist_init(pagelist); reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false); if (!reply) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 5cc8b94f8206..316f6ad10644 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -951,11 +951,10 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, if (size > 0) { /* copy value into pagelist */ - pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); + pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!pagelist) return -ENOMEM; - ceph_pagelist_init(pagelist); err = ceph_pagelist_append(pagelist, value, size); if (err) goto out; diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h index d0223364349f..5dead8486fd8 100644 --- a/include/linux/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h @@ -23,16 +23,7 @@ struct ceph_pagelist_cursor { size_t room; /* room remaining to reset to */ }; -static inline void ceph_pagelist_init(struct ceph_pagelist *pl) -{ - INIT_LIST_HEAD(&pl->head); - pl->mapped_tail = NULL; - pl->length = 0; - pl->room = 0; - INIT_LIST_HEAD(&pl->free_list); - pl->num_pages_free = 0; - refcount_set(&pl->refcnt, 1); -} +struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags); extern void ceph_pagelist_release(struct ceph_pagelist *pl); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index a871b234cd90..db2ebc9e5f1f 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -776,12 +776,10 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); - pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS); + pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!pagelist) return -ENOMEM; - ceph_pagelist_init(pagelist); - op->cls.class_name = class; size = strlen(class); BUG_ON(size > (size_t) U8_MAX); @@ -814,12 +812,10 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR); - pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); + pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!pagelist) return -ENOMEM; - ceph_pagelist_init(pagelist); - payload_len = strlen(name); op->xattr.name_len = payload_len; ceph_pagelist_append(pagelist, name, payload_len); @@ -4598,11 +4594,10 @@ static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which, op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0); - pl = kmalloc(sizeof(*pl), GFP_NOIO); + pl = ceph_pagelist_alloc(GFP_NOIO); if (!pl) return -ENOMEM; - ceph_pagelist_init(pl); ret = ceph_pagelist_encode_64(pl, notify_id); ret |= ceph_pagelist_encode_64(pl, cookie); if (payload) { @@ -4669,11 +4664,10 @@ static int osd_req_op_notify_init(struct ceph_osd_request *req, int which, op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); op->notify.cookie = cookie; - pl = kmalloc(sizeof(*pl), GFP_NOIO); + pl = ceph_pagelist_alloc(GFP_NOIO); if (!pl) return -ENOMEM; - ceph_pagelist_init(pl); ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */ ret |= ceph_pagelist_encode_32(pl, timeout); ret |= ceph_pagelist_encode_32(pl, payload_len); diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c index 2ea0564771d2..65e34f78b05d 100644 --- a/net/ceph/pagelist.c +++ b/net/ceph/pagelist.c @@ -6,6 +6,26 @@ #include #include +struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags) +{ + struct ceph_pagelist *pl; + + pl = kmalloc(sizeof(*pl), gfp_flags); + if (!pl) + return NULL; + + INIT_LIST_HEAD(&pl->head); + pl->mapped_tail = NULL; + pl->length = 0; + pl->room = 0; + INIT_LIST_HEAD(&pl->free_list); + pl->num_pages_free = 0; + refcount_set(&pl->refcnt, 1); + + return pl; +} +EXPORT_SYMBOL(ceph_pagelist_alloc); + static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) { if (pl->mapped_tail) { -- cgit v1.2.3 From 894868330a1e038ea4a65dbb81741eef70ad71b1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 28 Sep 2018 16:02:53 +0200 Subject: libceph: don't consume a ref on pagelist in ceph_msg_data_add_pagelist() Because send_mds_reconnect() wants to send a message with a pagelist and pass the ownership to the messenger, ceph_msg_data_add_pagelist() consumes a ref which is then put in ceph_msg_data_destroy(). This makes managing pagelists in the OSD client (where they are wrapped in ceph_osd_data) unnecessarily hard because the handoff only happens in ceph_osdc_start_request() instead of when the pagelist is passed to ceph_osd_data_pagelist_init(). I counted several memory leaks on various error paths. Fix up ceph_msg_data_add_pagelist() and carry a pagelist ref in ceph_osd_data. Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 2 +- net/ceph/messenger.c | 1 + net/ceph/osd_client.c | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 580a79b9a91f..97de674ea377 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2136,7 +2136,6 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, if (req->r_pagelist) { struct ceph_pagelist *pagelist = req->r_pagelist; - refcount_inc(&pagelist->refcnt); ceph_msg_data_add_pagelist(msg, pagelist); msg->hdr.data_len = cpu_to_le32(pagelist->length); } else { @@ -3240,6 +3239,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, mutex_unlock(&mdsc->mutex); up_read(&mdsc->snap_rwsem); + ceph_pagelist_release(pagelist); return; fail: diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0a187196aeed..76684edc43ef 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3313,6 +3313,7 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg, data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST); BUG_ON(!data); + refcount_inc(&pagelist->refcnt); data->pagelist = pagelist; list_add_tail(&data->links, &msg->data); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index db2ebc9e5f1f..ad00495dbd39 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -126,6 +126,9 @@ static void ceph_osd_data_init(struct ceph_osd_data *osd_data) osd_data->type = CEPH_OSD_DATA_TYPE_NONE; } +/* + * Consumes @pages if @own_pages is true. + */ static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) @@ -138,6 +141,9 @@ static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data, osd_data->own_pages = own_pages; } +/* + * Consumes a ref on @pagelist. + */ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data, struct ceph_pagelist *pagelist) { @@ -362,6 +368,8 @@ static void ceph_osd_data_release(struct ceph_osd_data *osd_data) num_pages = calc_pages_for((u64)osd_data->alignment, (u64)osd_data->length); ceph_release_page_vector(osd_data->pages, num_pages); + } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) { + ceph_pagelist_release(osd_data->pagelist); } ceph_osd_data_init(osd_data); } -- cgit v1.2.3 From 41a264e1b30ccde9ac83a314559ddacb78a85e91 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 13 Oct 2018 11:36:52 +0200 Subject: libceph: no need to call osd_req_opcode_valid() in osd_req_encode_op() Any uninitialized or unknown ops will be caught by the default clause anyway. Signed-off-by: Ilya Dryomov --- net/ceph/osd_client.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ad00495dbd39..f403a483d51d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -903,12 +903,6 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg, static u32 osd_req_encode_op(struct ceph_osd_op *dst, const struct ceph_osd_req_op *src) { - if (WARN_ON(!osd_req_opcode_valid(src->op))) { - pr_err("unrecognized osd opcode %d\n", src->op); - - return 0; - } - switch (src->op) { case CEPH_OSD_OP_STAT: break; -- cgit v1.2.3 From 3b83f60da6dd1becd865c1e2745123a8ae378c25 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 11 Oct 2018 17:04:33 +0200 Subject: libceph: enable fallback to ceph_msg_new() in ceph_msgpool_get() ceph_msgpool_get() can fall back to ceph_msg_new() when it is asked for a message whose front portion is larger than pool->front_len. However the caller always passes 0, effectively disabling that code path. The allocation goes to the message pool and returns a message with a front that is smaller than requested, setting us up for a crash. One example of this is a directory with a large number of snapshots. If its snap context doesn't fit, we oops in encode_request_partial(). Signed-off-by: Ilya Dryomov --- net/ceph/msgpool.c | 2 +- net/ceph/osd_client.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index 72571535883f..3dddc074f0d7 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -61,7 +61,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, if (front_len > pool->front_len) { dout("msgpool_get %s need front %d, pool size is %d\n", pool->name, front_len, pool->front_len); - WARN_ON(1); + WARN_ON_ONCE(1); /* try to alloc a fresh message */ return ceph_msg_new(pool->type, front_len, GFP_NOFS, false); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f403a483d51d..35bc77c8c230 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -641,7 +641,7 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) msg_size += 4 + 8; /* retry_attempt, features */ if (req->r_mempool) - msg = ceph_msgpool_get(&osdc->msgpool_op, 0); + msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size); else msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp, true); if (!msg) @@ -656,7 +656,7 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) msg_size += req->r_num_ops * sizeof(struct ceph_osd_op); if (req->r_mempool) - msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); + msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size); else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, gfp, true); if (!msg) -- cgit v1.2.3 From 81c65213d73cc51f2f2e4326d602e107c07afb05 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 11 Oct 2018 12:58:33 +0200 Subject: libceph: assign cookies in linger_submit() Register lingers directly in linger_submit(). This avoids allocating memory for notify pagelist while holding osdc->lock and simplifies both callers of linger_submit(). Signed-off-by: Ilya Dryomov --- net/ceph/osd_client.c | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 35bc77c8c230..17d1d2a04a7a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2998,11 +2998,21 @@ static void linger_submit(struct ceph_osd_linger_request *lreq) struct ceph_osd_client *osdc = lreq->osdc; struct ceph_osd *osd; + down_write(&osdc->lock); + linger_register(lreq); + if (lreq->is_watch) { + lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id; + lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id; + } else { + lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id; + } + calc_target(osdc, &lreq->t, NULL, false); osd = lookup_create_osd(osdc, lreq->t.osd, true); link_linger(osd, lreq); send_linger(lreq); + up_write(&osdc->lock); } static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq) @@ -4523,15 +4533,14 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, goto err_put_lreq; } - down_write(&osdc->lock); - linger_register(lreq); /* before osd_req_op_* */ - osd_req_op_watch_init(lreq->reg_req, 0, lreq->linger_id, - CEPH_OSD_WATCH_OP_WATCH); - osd_req_op_watch_init(lreq->ping_req, 0, lreq->linger_id, - CEPH_OSD_WATCH_OP_PING); - linger_submit(lreq); - up_write(&osdc->lock); + /* + * Pass 0 for cookie because we don't know it yet, it will be + * filled in by linger_submit(). + */ + osd_req_op_watch_init(lreq->reg_req, 0, 0, CEPH_OSD_WATCH_OP_WATCH); + osd_req_op_watch_init(lreq->ping_req, 0, 0, CEPH_OSD_WATCH_OP_PING); + linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (ret) { linger_cancel(lreq); @@ -4728,29 +4737,26 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, goto out_put_lreq; } + /* + * Pass 0 for cookie because we don't know it yet, it will be + * filled in by linger_submit(). + */ + ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout, + payload, payload_len); + if (ret) + goto out_put_lreq; + /* for notify_id */ pages = ceph_alloc_page_vector(1, GFP_NOIO); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out_put_lreq; } - - down_write(&osdc->lock); - linger_register(lreq); /* before osd_req_op_* */ - ret = osd_req_op_notify_init(lreq->reg_req, 0, lreq->linger_id, 1, - timeout, payload, payload_len); - if (ret) { - linger_unregister(lreq); - up_write(&osdc->lock); - ceph_release_page_vector(pages, 1); - goto out_put_lreq; - } ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify, response_data), pages, PAGE_SIZE, 0, false, true); - linger_submit(lreq); - up_write(&osdc->lock); + linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (!ret) ret = linger_notify_finish_wait(lreq); -- cgit v1.2.3 From 39e58c3425b1816c84724e8a77f9f9daa0143263 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 15 Oct 2018 15:26:27 +0200 Subject: libceph: introduce alloc_watch_request() ceph_osdc_alloc_messages() call will be moved out of alloc_linger_request() in the next commit, which means that ceph_osdc_watch() will need to call ceph_osdc_alloc_messages() twice. Add a helper for that. Signed-off-by: Ilya Dryomov --- net/ceph/osd_client.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 17d1d2a04a7a..a5fbb38086b6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -4492,6 +4492,23 @@ alloc_linger_request(struct ceph_osd_linger_request *lreq) return req; } +static struct ceph_osd_request * +alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode) +{ + struct ceph_osd_request *req; + + req = alloc_linger_request(lreq); + if (!req) + return NULL; + + /* + * Pass 0 for cookie because we don't know it yet, it will be + * filled in by linger_submit(). + */ + osd_req_op_watch_init(req, 0, 0, watch_opcode); + return req; +} + /* * Returns a handle, caller owns a ref. */ @@ -4521,25 +4538,18 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, lreq->t.flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts64(&lreq->mtime); - lreq->reg_req = alloc_linger_request(lreq); + lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH); if (!lreq->reg_req) { ret = -ENOMEM; goto err_put_lreq; } - lreq->ping_req = alloc_linger_request(lreq); + lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING); if (!lreq->ping_req) { ret = -ENOMEM; goto err_put_lreq; } - /* - * Pass 0 for cookie because we don't know it yet, it will be - * filled in by linger_submit(). - */ - osd_req_op_watch_init(lreq->reg_req, 0, 0, CEPH_OSD_WATCH_OP_WATCH); - osd_req_op_watch_init(lreq->ping_req, 0, 0, CEPH_OSD_WATCH_OP_PING); - linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (ret) { -- cgit v1.2.3 From 26f887e0a3c43f67b550e2e5d8a76e86ca11d188 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 15 Oct 2018 16:11:37 +0200 Subject: libceph, rbd, ceph: move ceph_osdc_alloc_messages() calls The current requirement is that ceph_osdc_alloc_messages() should be called after oid and oloc are known. In preparation for preallocating message data items, move ceph_osdc_alloc_messages() further down, so that it is called when OSD op codes are known. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 19 ++++++++++++------- fs/ceph/file.c | 10 +++++----- net/ceph/osd_client.c | 38 +++++++++++++++++++++----------------- 3 files changed, 38 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 9cc7ee3b427f..8e5140bbf241 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1500,9 +1500,6 @@ rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops) rbd_dev->header.object_prefix, obj_req->ex.oe_objno)) goto err_req; - if (ceph_osdc_alloc_messages(req, GFP_NOIO)) - goto err_req; - return req; err_req: @@ -1945,6 +1942,10 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req) } if (ret) return ret; + + ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO); + if (ret) + return ret; } return 0; @@ -2404,6 +2405,10 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) rbd_assert(0); } + ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO); + if (ret) + return ret; + rbd_obj_request_submit(obj_req); return 0; } @@ -3783,10 +3788,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, ceph_oloc_copy(&req->r_base_oloc, oloc); req->r_flags = CEPH_OSD_FLAG_READ; - ret = ceph_osdc_alloc_messages(req, GFP_KERNEL); - if (ret) - goto out_req; - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) { ret = PTR_ERR(pages); @@ -3797,6 +3798,10 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false, true); + ret = ceph_osdc_alloc_messages(req, GFP_KERNEL); + if (ret) + goto out_req; + ceph_osdc_start_request(osdc, req, false); ret = ceph_osdc_wait_request(osdc, req); if (ret >= 0) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0265f9ae0ab9..0fa6b6b6ccbc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -870,6 +870,11 @@ static void ceph_aio_retry_work(struct work_struct *work) ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc); ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid); + req->r_ops[0] = orig_req->r_ops[0]; + + req->r_mtime = aio_req->mtime; + req->r_data_offset = req->r_ops[0].extent.offset; + ret = ceph_osdc_alloc_messages(req, GFP_NOFS); if (ret) { ceph_osdc_put_request(req); @@ -877,11 +882,6 @@ static void ceph_aio_retry_work(struct work_struct *work) goto out; } - req->r_ops[0] = orig_req->r_ops[0]; - - req->r_mtime = aio_req->mtime; - req->r_data_offset = req->r_ops[0].extent.offset; - ceph_osdc_put_request(orig_req); req->r_callback = ceph_aio_complete_req; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index a5fbb38086b6..7ac7f21ff317 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -4483,12 +4483,6 @@ alloc_linger_request(struct ceph_osd_linger_request *lreq) ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); - - if (ceph_osdc_alloc_messages(req, GFP_NOIO)) { - ceph_osdc_put_request(req); - return NULL; - } - return req; } @@ -4506,6 +4500,12 @@ alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode) * filled in by linger_submit(). */ osd_req_op_watch_init(req, 0, 0, watch_opcode); + + if (ceph_osdc_alloc_messages(req, GFP_NOIO)) { + ceph_osdc_put_request(req); + return NULL; + } + return req; } @@ -4656,12 +4656,12 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, ceph_oloc_copy(&req->r_base_oloc, oloc); req->r_flags = CEPH_OSD_FLAG_READ; - ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + ret = osd_req_op_notify_ack_init(req, 0, notify_id, cookie, payload, + payload_len); if (ret) goto out_put_req; - ret = osd_req_op_notify_ack_init(req, 0, notify_id, cookie, payload, - payload_len); + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); if (ret) goto out_put_req; @@ -4766,6 +4766,10 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, response_data), pages, PAGE_SIZE, 0, false, true); + ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO); + if (ret) + goto out_put_lreq; + linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (!ret) @@ -4892,10 +4896,6 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, ceph_oloc_copy(&req->r_base_oloc, oloc); req->r_flags = CEPH_OSD_FLAG_READ; - ret = ceph_osdc_alloc_messages(req, GFP_NOIO); - if (ret) - goto out_put_req; - pages = ceph_alloc_page_vector(1, GFP_NOIO); if (IS_ERR(pages)) { ret = PTR_ERR(pages); @@ -4907,6 +4907,10 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, response_data), pages, PAGE_SIZE, 0, false, true); + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + if (ret) + goto out_put_req; + ceph_osdc_start_request(osdc, req, false); ret = ceph_osdc_wait_request(osdc, req); if (ret >= 0) { @@ -4969,10 +4973,6 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, ceph_oloc_copy(&req->r_base_oloc, oloc); req->r_flags = flags; - ret = ceph_osdc_alloc_messages(req, GFP_NOIO); - if (ret) - goto out_put_req; - ret = osd_req_op_cls_init(req, 0, class, method); if (ret) goto out_put_req; @@ -4984,6 +4984,10 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, osd_req_op_cls_response_data_pages(req, 0, &resp_page, *resp_len, 0, false, false); + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + if (ret) + goto out_put_req; + ceph_osdc_start_request(osdc, req, false); ret = ceph_osdc_wait_request(osdc, req); if (ret >= 0) { -- cgit v1.2.3 From 0d9c1ab3be4c0187663096a6a084421d0a1e45c6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 15 Oct 2018 17:38:23 +0200 Subject: libceph: preallocate message data items Currently message data items are allocated with ceph_msg_data_create() in setup_request_data() inside send_request(). send_request() has never been allowed to fail, so each allocation is followed by a BUG_ON: data = ceph_msg_data_create(...); BUG_ON(!data); It's been this way since support for multiple message data items was added in commit 6644ed7b7e04 ("libceph: make message data be a pointer") in 3.10. There is no reason to delay the allocation of message data items until the last possible moment and we certainly don't need a linked list of them as they are only ever appended to the end and never erased. Make ceph_msg_new2() take max_data_items and adapt the rest of the code. Reported-by: Jerry Lee Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 4 +- include/linux/ceph/messenger.h | 24 ++-------- include/linux/ceph/msgpool.h | 11 +++-- net/ceph/messenger.c | 106 +++++++++++++++-------------------------- net/ceph/msgpool.c | 25 ++++++---- net/ceph/osd_client.c | 102 +++++++++++++++++++++++++++++++++------ 6 files changed, 157 insertions(+), 115 deletions(-) (limited to 'net') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 97de674ea377..67a9aeb2f4ec 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2071,7 +2071,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, if (req->r_old_dentry_drop) len += req->r_old_dentry->d_name.len; - msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS, false); + msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false); if (!msg) { msg = ERR_PTR(-ENOMEM); goto out_free2; @@ -3129,7 +3129,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, if (!pagelist) goto fail_nopagelist; - reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false); + reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false); if (!reply) goto fail_nomsg; diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index fc2b4491ee0a..800a2128d411 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -82,22 +82,6 @@ enum ceph_msg_data_type { CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ }; -static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type) -{ - switch (type) { - case CEPH_MSG_DATA_NONE: - case CEPH_MSG_DATA_PAGES: - case CEPH_MSG_DATA_PAGELIST: -#ifdef CONFIG_BLOCK - case CEPH_MSG_DATA_BIO: -#endif /* CONFIG_BLOCK */ - case CEPH_MSG_DATA_BVECS: - return true; - default: - return false; - } -} - #ifdef CONFIG_BLOCK struct ceph_bio_iter { @@ -181,7 +165,6 @@ struct ceph_bvec_iter { } while (0) struct ceph_msg_data { - struct list_head links; /* ceph_msg->data */ enum ceph_msg_data_type type; union { #ifdef CONFIG_BLOCK @@ -202,7 +185,6 @@ struct ceph_msg_data { struct ceph_msg_data_cursor { size_t total_resid; /* across all data items */ - struct list_head *data_head; /* = &ceph_msg->data */ struct ceph_msg_data *data; /* current data item */ size_t resid; /* bytes not yet consumed */ @@ -240,7 +222,9 @@ struct ceph_msg { struct ceph_buffer *middle; size_t data_length; - struct list_head data; + struct ceph_msg_data *data; + int num_data_items; + int max_data_items; struct ceph_msg_data_cursor cursor; struct ceph_connection *con; @@ -381,6 +365,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, void ceph_msg_data_add_bvecs(struct ceph_msg *msg, struct ceph_bvec_iter *bvec_pos); +struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, + gfp_t flags, bool can_fail); extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, bool can_fail); diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h index 76c98a512758..729cdf700eae 100644 --- a/include/linux/ceph/msgpool.h +++ b/include/linux/ceph/msgpool.h @@ -13,14 +13,15 @@ struct ceph_msgpool { mempool_t *pool; int type; /* preallocated message type */ int front_len; /* preallocated payload size */ + int max_data_items; }; -extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type, - int front_len, int size, bool blocking, - const char *name); +int ceph_msgpool_init(struct ceph_msgpool *pool, int type, + int front_len, int max_data_items, int size, + const char *name); extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); -extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, - int front_len); +struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len, + int max_data_items); extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); #endif diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 76684edc43ef..88e35830198c 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -156,7 +156,6 @@ static bool con_flag_test_and_set(struct ceph_connection *con, /* Slab caches for frequently-allocated structures */ static struct kmem_cache *ceph_msg_cache; -static struct kmem_cache *ceph_msg_data_cache; /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; @@ -235,23 +234,11 @@ static int ceph_msgr_slab_init(void) if (!ceph_msg_cache) return -ENOMEM; - BUG_ON(ceph_msg_data_cache); - ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0); - if (ceph_msg_data_cache) - return 0; - - kmem_cache_destroy(ceph_msg_cache); - ceph_msg_cache = NULL; - - return -ENOMEM; + return 0; } static void ceph_msgr_slab_exit(void) { - BUG_ON(!ceph_msg_data_cache); - kmem_cache_destroy(ceph_msg_data_cache); - ceph_msg_data_cache = NULL; - BUG_ON(!ceph_msg_cache); kmem_cache_destroy(ceph_msg_cache); ceph_msg_cache = NULL; @@ -1141,16 +1128,13 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length) { struct ceph_msg_data_cursor *cursor = &msg->cursor; - struct ceph_msg_data *data; BUG_ON(!length); BUG_ON(length > msg->data_length); - BUG_ON(list_empty(&msg->data)); + BUG_ON(!msg->num_data_items); - cursor->data_head = &msg->data; cursor->total_resid = length; - data = list_first_entry(&msg->data, struct ceph_msg_data, links); - cursor->data = data; + cursor->data = msg->data; __ceph_msg_data_cursor_init(cursor); } @@ -1231,8 +1215,7 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, if (!cursor->resid && cursor->total_resid) { WARN_ON(!cursor->last_piece); - BUG_ON(list_is_last(&cursor->data->links, cursor->data_head)); - cursor->data = list_next_entry(cursor->data, links); + cursor->data++; __ceph_msg_data_cursor_init(cursor); new_piece = true; } @@ -1248,9 +1231,6 @@ static size_t sizeof_footer(struct ceph_connection *con) static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { - BUG_ON(!msg); - BUG_ON(!data_len); - /* Initialize data cursor */ ceph_msg_data_cursor_init(msg, (size_t)data_len); @@ -1590,7 +1570,7 @@ static int write_partial_message_data(struct ceph_connection *con) dout("%s %p msg %p\n", __func__, con, msg); - if (list_empty(&msg->data)) + if (!msg->num_data_items) return -EINVAL; /* @@ -2347,8 +2327,7 @@ static int read_partial_msg_data(struct ceph_connection *con) u32 crc = 0; int ret; - BUG_ON(!msg); - if (list_empty(&msg->data)) + if (!msg->num_data_items) return -EIO; if (do_datacrc) @@ -3256,32 +3235,16 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con, return false; } -static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) +static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg) { - struct ceph_msg_data *data; - - if (WARN_ON(!ceph_msg_data_type_valid(type))) - return NULL; - - data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS); - if (!data) - return NULL; - - data->type = type; - INIT_LIST_HEAD(&data->links); - - return data; + BUG_ON(msg->num_data_items >= msg->max_data_items); + return &msg->data[msg->num_data_items++]; } static void ceph_msg_data_destroy(struct ceph_msg_data *data) { - if (!data) - return; - - WARN_ON(!list_empty(&data->links)); if (data->type == CEPH_MSG_DATA_PAGELIST) ceph_pagelist_release(data->pagelist); - kmem_cache_free(ceph_msg_data_cache, data); } void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, @@ -3292,13 +3255,12 @@ void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, BUG_ON(!pages); BUG_ON(!length); - data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES); - BUG_ON(!data); + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_PAGES; data->pages = pages; data->length = length; data->alignment = alignment & ~PAGE_MASK; - list_add_tail(&data->links, &msg->data); msg->data_length += length; } EXPORT_SYMBOL(ceph_msg_data_add_pages); @@ -3311,12 +3273,11 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg, BUG_ON(!pagelist); BUG_ON(!pagelist->length); - data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST); - BUG_ON(!data); + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_PAGELIST; refcount_inc(&pagelist->refcnt); data->pagelist = pagelist; - list_add_tail(&data->links, &msg->data); msg->data_length += pagelist->length; } EXPORT_SYMBOL(ceph_msg_data_add_pagelist); @@ -3327,12 +3288,11 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, { struct ceph_msg_data *data; - data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); - BUG_ON(!data); + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_BIO; data->bio_pos = *bio_pos; data->bio_length = length; - list_add_tail(&data->links, &msg->data); msg->data_length += length; } EXPORT_SYMBOL(ceph_msg_data_add_bio); @@ -3343,11 +3303,10 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, { struct ceph_msg_data *data; - data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS); - BUG_ON(!data); + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_BVECS; data->bvec_pos = *bvec_pos; - list_add_tail(&data->links, &msg->data); msg->data_length += bvec_pos->iter.bi_size; } EXPORT_SYMBOL(ceph_msg_data_add_bvecs); @@ -3356,8 +3315,8 @@ EXPORT_SYMBOL(ceph_msg_data_add_bvecs); * construct a new message with given type, size * the new msg has a ref count of 1. */ -struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, - bool can_fail) +struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, + gfp_t flags, bool can_fail) { struct ceph_msg *m; @@ -3371,7 +3330,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, INIT_LIST_HEAD(&m->list_head); kref_init(&m->kref); - INIT_LIST_HEAD(&m->data); /* front */ if (front_len) { @@ -3386,6 +3344,15 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, } m->front_alloc_len = m->front.iov_len = front_len; + if (max_data_items) { + m->data = kmalloc_array(max_data_items, sizeof(*m->data), + flags); + if (!m->data) + goto out2; + + m->max_data_items = max_data_items; + } + dout("ceph_msg_new %p front %d\n", m, front_len); return m; @@ -3402,6 +3369,13 @@ out: } return NULL; } +EXPORT_SYMBOL(ceph_msg_new2); + +struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, + bool can_fail) +{ + return ceph_msg_new2(type, front_len, 0, flags, can_fail); +} EXPORT_SYMBOL(ceph_msg_new); /* @@ -3497,13 +3471,14 @@ static void ceph_msg_free(struct ceph_msg *m) { dout("%s %p\n", __func__, m); kvfree(m->front.iov_base); + kfree(m->data); kmem_cache_free(ceph_msg_cache, m); } static void ceph_msg_release(struct kref *kref) { struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); - struct ceph_msg_data *data, *next; + int i; dout("%s %p\n", __func__, m); WARN_ON(!list_empty(&m->list_head)); @@ -3516,11 +3491,8 @@ static void ceph_msg_release(struct kref *kref) m->middle = NULL; } - list_for_each_entry_safe(data, next, &m->data, links) { - list_del_init(&data->links); - ceph_msg_data_destroy(data); - } - m->data_length = 0; + for (i = 0; i < m->num_data_items; i++) + ceph_msg_data_destroy(&m->data[i]); if (m->pool) ceph_msgpool_put(m->pool, m); diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index 3dddc074f0d7..e3ecb80cd182 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -14,7 +14,8 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg) struct ceph_msgpool *pool = arg; struct ceph_msg *msg; - msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true); + msg = ceph_msg_new2(pool->type, pool->front_len, pool->max_data_items, + gfp_mask, true); if (!msg) { dout("msgpool_alloc %s failed\n", pool->name); } else { @@ -35,11 +36,13 @@ static void msgpool_free(void *element, void *arg) } int ceph_msgpool_init(struct ceph_msgpool *pool, int type, - int front_len, int size, bool blocking, const char *name) + int front_len, int max_data_items, int size, + const char *name) { dout("msgpool %s init\n", name); pool->type = type; pool->front_len = front_len; + pool->max_data_items = max_data_items; pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); if (!pool->pool) return -ENOMEM; @@ -53,18 +56,21 @@ void ceph_msgpool_destroy(struct ceph_msgpool *pool) mempool_destroy(pool->pool); } -struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, - int front_len) +struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len, + int max_data_items) { struct ceph_msg *msg; - if (front_len > pool->front_len) { - dout("msgpool_get %s need front %d, pool size is %d\n", - pool->name, front_len, pool->front_len); + if (front_len > pool->front_len || + max_data_items > pool->max_data_items) { + pr_warn_ratelimited("%s need %d/%d, pool %s has %d/%d\n", + __func__, front_len, max_data_items, pool->name, + pool->front_len, pool->max_data_items); WARN_ON_ONCE(1); /* try to alloc a fresh message */ - return ceph_msg_new(pool->type, front_len, GFP_NOFS, false); + return ceph_msg_new2(pool->type, front_len, max_data_items, + GFP_NOFS, false); } msg = mempool_alloc(pool->pool, GFP_NOFS); @@ -80,6 +86,9 @@ void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) msg->front.iov_len = pool->front_len; msg->hdr.front_len = cpu_to_le32(pool->front_len); + msg->data_length = 0; + msg->num_data_items = 0; + kref_init(&msg->kref); /* retake single ref */ mempool_free(msg, pool->pool); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 7ac7f21ff317..cf0bd2cce848 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -614,12 +614,15 @@ static int ceph_oloc_encoding_size(const struct ceph_object_locator *oloc) return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0); } -int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) +static int __ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp, + int num_request_data_items, + int num_reply_data_items) { struct ceph_osd_client *osdc = req->r_osdc; struct ceph_msg *msg; int msg_size; + WARN_ON(req->r_request || req->r_reply); WARN_ON(ceph_oid_empty(&req->r_base_oid)); WARN_ON(ceph_oloc_empty(&req->r_base_oloc)); @@ -641,9 +644,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) msg_size += 4 + 8; /* retry_attempt, features */ if (req->r_mempool) - msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size); + msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size, + num_request_data_items); else - msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp, true); + msg = ceph_msg_new2(CEPH_MSG_OSD_OP, msg_size, + num_request_data_items, gfp, true); if (!msg) return -ENOMEM; @@ -656,9 +661,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) msg_size += req->r_num_ops * sizeof(struct ceph_osd_op); if (req->r_mempool) - msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size); + msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size, + num_reply_data_items); else - msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, gfp, true); + msg = ceph_msg_new2(CEPH_MSG_OSD_OPREPLY, msg_size, + num_reply_data_items, gfp, true); if (!msg) return -ENOMEM; @@ -666,7 +673,6 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) return 0; } -EXPORT_SYMBOL(ceph_osdc_alloc_messages); static bool osd_req_opcode_valid(u16 opcode) { @@ -679,6 +685,64 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE) } } +static void get_num_data_items(struct ceph_osd_request *req, + int *num_request_data_items, + int *num_reply_data_items) +{ + struct ceph_osd_req_op *op; + + *num_request_data_items = 0; + *num_reply_data_items = 0; + + for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) { + switch (op->op) { + /* request */ + case CEPH_OSD_OP_WRITE: + case CEPH_OSD_OP_WRITEFULL: + case CEPH_OSD_OP_SETXATTR: + case CEPH_OSD_OP_CMPXATTR: + case CEPH_OSD_OP_NOTIFY_ACK: + *num_request_data_items += 1; + break; + + /* reply */ + case CEPH_OSD_OP_STAT: + case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_LIST_WATCHERS: + *num_reply_data_items += 1; + break; + + /* both */ + case CEPH_OSD_OP_NOTIFY: + *num_request_data_items += 1; + *num_reply_data_items += 1; + break; + case CEPH_OSD_OP_CALL: + *num_request_data_items += 2; + *num_reply_data_items += 1; + break; + + default: + WARN_ON(!osd_req_opcode_valid(op->op)); + break; + } + } +} + +/* + * oid, oloc and OSD op opcode(s) must be filled in before this function + * is called. + */ +int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) +{ + int num_request_data_items, num_reply_data_items; + + get_num_data_items(req, &num_request_data_items, &num_reply_data_items); + return __ceph_osdc_alloc_messages(req, gfp, num_request_data_items, + num_reply_data_items); +} +EXPORT_SYMBOL(ceph_osdc_alloc_messages); + /* * This is an osd op init function for opcodes that have no data or * other information associated with them. It also serves as a @@ -1035,7 +1099,15 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, if (flags & CEPH_OSD_FLAG_WRITE) req->r_data_offset = off; - r = ceph_osdc_alloc_messages(req, GFP_NOFS); + if (num_ops > 1) + /* + * This is a special case for ceph_writepages_start(), but it + * also covers ceph_uninline_data(). If more multi-op request + * use cases emerge, we will need a separate helper. + */ + r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0); + else + r = ceph_osdc_alloc_messages(req, GFP_NOFS); if (r) goto fail; @@ -1842,13 +1914,16 @@ static bool should_plug_request(struct ceph_osd_request *req) return true; } +/* + * Keep get_num_data_items() in sync with this function. + */ static void setup_request_data(struct ceph_osd_request *req, struct ceph_msg *msg) { u32 data_len = 0; int i; - if (!list_empty(&msg->data)) + if (msg->num_data_items) return; WARN_ON(msg->data_length); @@ -4325,9 +4400,7 @@ static void handle_watch_notify(struct ceph_osd_client *osdc, lreq->notify_id, notify_id); } else if (!completion_done(&lreq->notify_finish_wait)) { struct ceph_msg_data *data = - list_first_entry_or_null(&msg->data, - struct ceph_msg_data, - links); + msg->num_data_items ? &msg->data[0] : NULL; if (data) { if (lreq->preply_pages) { @@ -5036,11 +5109,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) goto out_map; err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP, - PAGE_SIZE, 10, true, "osd_op"); + PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, "osd_op"); if (err < 0) goto out_mempool; err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY, - PAGE_SIZE, 10, true, "osd_op_reply"); + PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, + "osd_op_reply"); if (err < 0) goto out_msgpool; @@ -5310,7 +5384,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr) u32 front_len = le32_to_cpu(hdr->front_len); u32 data_len = le32_to_cpu(hdr->data_len); - m = ceph_msg_new(type, front_len, GFP_NOIO, false); + m = ceph_msg_new2(type, front_len, 1, GFP_NOIO, false); if (!m) return NULL; -- cgit v1.2.3 From 98c4bfe9d89b22d7bfddf6469241658920b6fafe Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 17 Oct 2018 14:23:04 +0200 Subject: libceph: check reply num_data_items in setup_request_data() setup_request_data() adds message data items to both request and reply messages, but only checks request num_data_items before proceeding with the loop. This is wrong because if an op doesn't have any request data items but has a reply data item (e.g. read), a duplicate data item gets added to the message on every resend attempt. This went unnoticed for years but now that message data items are preallocated, it promptly crashes in ceph_msg_data_add(). Amend the signature to make it clear that setup_request_data() operates on both request and reply messages. Also, remove data_len assert -- we have another one in prepare_write_message(). Signed-off-by: Ilya Dryomov --- net/ceph/osd_client.c | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index cf0bd2cce848..a0148de51cc6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1917,48 +1917,48 @@ static bool should_plug_request(struct ceph_osd_request *req) /* * Keep get_num_data_items() in sync with this function. */ -static void setup_request_data(struct ceph_osd_request *req, - struct ceph_msg *msg) +static void setup_request_data(struct ceph_osd_request *req) { - u32 data_len = 0; - int i; + struct ceph_msg *request_msg = req->r_request; + struct ceph_msg *reply_msg = req->r_reply; + struct ceph_osd_req_op *op; - if (msg->num_data_items) + if (req->r_request->num_data_items || req->r_reply->num_data_items) return; - WARN_ON(msg->data_length); - for (i = 0; i < req->r_num_ops; i++) { - struct ceph_osd_req_op *op = &req->r_ops[i]; - + WARN_ON(request_msg->data_length || reply_msg->data_length); + for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) { switch (op->op) { /* request */ case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: WARN_ON(op->indata_len != op->extent.length); - ceph_osdc_msg_data_add(msg, &op->extent.osd_data); + ceph_osdc_msg_data_add(request_msg, + &op->extent.osd_data); break; case CEPH_OSD_OP_SETXATTR: case CEPH_OSD_OP_CMPXATTR: WARN_ON(op->indata_len != op->xattr.name_len + op->xattr.value_len); - ceph_osdc_msg_data_add(msg, &op->xattr.osd_data); + ceph_osdc_msg_data_add(request_msg, + &op->xattr.osd_data); break; case CEPH_OSD_OP_NOTIFY_ACK: - ceph_osdc_msg_data_add(msg, + ceph_osdc_msg_data_add(request_msg, &op->notify_ack.request_data); break; /* reply */ case CEPH_OSD_OP_STAT: - ceph_osdc_msg_data_add(req->r_reply, + ceph_osdc_msg_data_add(reply_msg, &op->raw_data_in); break; case CEPH_OSD_OP_READ: - ceph_osdc_msg_data_add(req->r_reply, + ceph_osdc_msg_data_add(reply_msg, &op->extent.osd_data); break; case CEPH_OSD_OP_LIST_WATCHERS: - ceph_osdc_msg_data_add(req->r_reply, + ceph_osdc_msg_data_add(reply_msg, &op->list_watchers.response_data); break; @@ -1967,25 +1967,23 @@ static void setup_request_data(struct ceph_osd_request *req, WARN_ON(op->indata_len != op->cls.class_len + op->cls.method_len + op->cls.indata_len); - ceph_osdc_msg_data_add(msg, &op->cls.request_info); + ceph_osdc_msg_data_add(request_msg, + &op->cls.request_info); /* optional, can be NONE */ - ceph_osdc_msg_data_add(msg, &op->cls.request_data); + ceph_osdc_msg_data_add(request_msg, + &op->cls.request_data); /* optional, can be NONE */ - ceph_osdc_msg_data_add(req->r_reply, + ceph_osdc_msg_data_add(reply_msg, &op->cls.response_data); break; case CEPH_OSD_OP_NOTIFY: - ceph_osdc_msg_data_add(msg, + ceph_osdc_msg_data_add(request_msg, &op->notify.request_data); - ceph_osdc_msg_data_add(req->r_reply, + ceph_osdc_msg_data_add(reply_msg, &op->notify.response_data); break; } - - data_len += op->indata_len; } - - WARN_ON(data_len != msg->data_length); } static void encode_pgid(void **p, const struct ceph_pg *pgid) @@ -2033,7 +2031,7 @@ static void encode_request_partial(struct ceph_osd_request *req, req->r_data_offset || req->r_snapc); } - setup_request_data(req, msg); + setup_request_data(req); encode_spgid(&p, &req->r_t.spgid); /* actual spg */ ceph_encode_32(&p, req->r_t.pgid.seed); /* raw hash */ -- cgit v1.2.3 From 23ddf9bea900b4ce39e5707e1ddf66062cfe6d91 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Mon, 15 Oct 2018 16:45:58 +0100 Subject: libceph: support the RADOS copy-from operation Add support for performing remote object copies using the 'copy-from' operation. [ Add COPY_FROM to get_num_data_items(). ] Signed-off-by: Luis Henriques Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 17 ++++++++ include/linux/ceph/rados.h | 28 +++++++++++++ net/ceph/osd_client.c | 90 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+) (limited to 'net') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index f3e828460c91..7a2af5034278 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -136,6 +136,13 @@ struct ceph_osd_req_op { u64 expected_object_size; u64 expected_write_size; } alloc_hint; + struct { + u64 snapid; + u64 src_version; + u8 flags; + u32 src_fadvise_flags; + struct ceph_osd_data osd_data; + } copy_from; }; }; @@ -510,6 +517,16 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct timespec64 *mtime, struct page **pages, int nr_pages); +int ceph_osdc_copy_from(struct ceph_osd_client *osdc, + u64 src_snapid, u64 src_version, + struct ceph_object_id *src_oid, + struct ceph_object_locator *src_oloc, + u32 src_fadvise_flags, + struct ceph_object_id *dst_oid, + struct ceph_object_locator *dst_oloc, + u32 dst_fadvise_flags, + u8 copy_from_flags); + /* watch/notify */ struct ceph_osd_linger_request * ceph_osdc_watch(struct ceph_osd_client *osdc, diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index f1988387c5ad..3eb0e55665b4 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -410,6 +410,14 @@ enum { enum { CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ + CEPH_OSD_OP_FLAG_FADVISE_RANDOM = 0x4, /* the op is random */ + CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL = 0x8, /* the op is sequential */ + CEPH_OSD_OP_FLAG_FADVISE_WILLNEED = 0x10,/* data will be accessed in + the near future */ + CEPH_OSD_OP_FLAG_FADVISE_DONTNEED = 0x20,/* data will not be accessed + in the near future */ + CEPH_OSD_OP_FLAG_FADVISE_NOCACHE = 0x40,/* data will be accessed only + once by this client */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ @@ -431,6 +439,15 @@ enum { CEPH_OSD_CMPXATTR_MODE_U64 = 2 }; +enum { + CEPH_OSD_COPY_FROM_FLAG_FLUSH = 1, /* part of a flush operation */ + CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY = 2, /* ignore pool overlay */ + CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE = 4, /* ignore osd cache logic */ + CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to + * cloneid */ + CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16, /* order with write */ +}; + enum { CEPH_OSD_WATCH_OP_UNWATCH = 0, CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1, @@ -497,6 +514,17 @@ struct ceph_osd_op { __le64 expected_object_size; __le64 expected_write_size; } __attribute__ ((packed)) alloc_hint; + struct { + __le64 snapid; + __le64 src_version; + __u8 flags; /* CEPH_OSD_COPY_FROM_FLAG_* */ + /* + * CEPH_OSD_OP_FLAG_FADVISE_*: fadvise flags + * for src object, flags for dest object are in + * ceph_osd_op::flags. + */ + __le32 src_fadvise_flags; + } __attribute__ ((packed)) copy_from; }; __le32 payload_len; } __attribute__ ((packed)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index a0148de51cc6..d23a9f81f3d7 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -410,6 +410,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, case CEPH_OSD_OP_LIST_WATCHERS: ceph_osd_data_release(&op->list_watchers.response_data); break; + case CEPH_OSD_OP_COPY_FROM: + ceph_osd_data_release(&op->copy_from.osd_data); + break; default: break; } @@ -702,6 +705,7 @@ static void get_num_data_items(struct ceph_osd_request *req, case CEPH_OSD_OP_SETXATTR: case CEPH_OSD_OP_CMPXATTR: case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_COPY_FROM: *num_request_data_items += 1; break; @@ -1016,6 +1020,14 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, case CEPH_OSD_OP_CREATE: case CEPH_OSD_OP_DELETE: break; + case CEPH_OSD_OP_COPY_FROM: + dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid); + dst->copy_from.src_version = + cpu_to_le64(src->copy_from.src_version); + dst->copy_from.flags = src->copy_from.flags; + dst->copy_from.src_fadvise_flags = + cpu_to_le32(src->copy_from.src_fadvise_flags); + break; default: pr_err("unsupported osd opcode %s\n", ceph_osd_op_name(src->op)); @@ -1947,6 +1959,10 @@ static void setup_request_data(struct ceph_osd_request *req) ceph_osdc_msg_data_add(request_msg, &op->notify_ack.request_data); break; + case CEPH_OSD_OP_COPY_FROM: + ceph_osdc_msg_data_add(request_msg, + &op->copy_from.osd_data); + break; /* reply */ case CEPH_OSD_OP_STAT: @@ -5255,6 +5271,80 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, } EXPORT_SYMBOL(ceph_osdc_writepages); +static int osd_req_op_copy_from_init(struct ceph_osd_request *req, + u64 src_snapid, u64 src_version, + struct ceph_object_id *src_oid, + struct ceph_object_locator *src_oloc, + u32 src_fadvise_flags, + u32 dst_fadvise_flags, + u8 copy_from_flags) +{ + struct ceph_osd_req_op *op; + struct page **pages; + void *p, *end; + + pages = ceph_alloc_page_vector(1, GFP_KERNEL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags); + op->copy_from.snapid = src_snapid; + op->copy_from.src_version = src_version; + op->copy_from.flags = copy_from_flags; + op->copy_from.src_fadvise_flags = src_fadvise_flags; + + p = page_address(pages[0]); + end = p + PAGE_SIZE; + ceph_encode_string(&p, end, src_oid->name, src_oid->name_len); + encode_oloc(&p, end, src_oloc); + op->indata_len = PAGE_SIZE - (end - p); + + ceph_osd_data_pages_init(&op->copy_from.osd_data, pages, + op->indata_len, 0, false, true); + return 0; +} + +int ceph_osdc_copy_from(struct ceph_osd_client *osdc, + u64 src_snapid, u64 src_version, + struct ceph_object_id *src_oid, + struct ceph_object_locator *src_oloc, + u32 src_fadvise_flags, + struct ceph_object_id *dst_oid, + struct ceph_object_locator *dst_oloc, + u32 dst_fadvise_flags, + u8 copy_from_flags) +{ + struct ceph_osd_request *req; + int ret; + + req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->r_flags = CEPH_OSD_FLAG_WRITE; + + ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc); + ceph_oid_copy(&req->r_t.base_oid, dst_oid); + + ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid, + src_oloc, src_fadvise_flags, + dst_fadvise_flags, copy_from_flags); + if (ret) + goto out; + + ret = ceph_osdc_alloc_messages(req, GFP_KERNEL); + if (ret) + goto out; + + ceph_osdc_start_request(osdc, req, false); + ret = ceph_osdc_wait_request(osdc, req); + +out: + ceph_osdc_put_request(req); + return ret; +} +EXPORT_SYMBOL(ceph_osdc_copy_from); + int __init ceph_osdc_setup(void) { size_t size = sizeof(struct ceph_osd_request) + -- cgit v1.2.3 From ab21c1b5f799395232b838e98981cfed6d647905 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:44 +0200 Subject: bpf: disallow direct packet access for unpriv in cg_skb Commit b39b5f411dcf ("bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB") added support for returning pkt pointers for direct packet access. Given this program type is allowed for both unprivileged and privileged users, we shouldn't allow unprivileged ones to use it, e.g. besides others one reason would be to avoid any potential speculation on the packet test itself, thus guard this for root only. Fixes: b39b5f411dcf ("bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Song Liu Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 6 ++++++ tools/testing/selftests/bpf/test_verifier.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 35c6933c2622..3fdddfa9a0fd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5496,7 +5496,13 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): return false; + case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_end): + if (!capable(CAP_SYS_ADMIN)) + return false; + break; } + if (type == BPF_WRITE) { switch (off) { case bpf_ctx_range(struct __sk_buff, mark): diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 8e1a79d2792c..36f3d3009d1a 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -4892,7 +4892,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R3 pointer comparison prohibited", + .errstr_unpriv = "invalid bpf_context access off=76 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { -- cgit v1.2.3 From b09928b976280d64060d7bee146d7df5c5a29bef Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Oct 2018 22:05:49 +0200 Subject: bpf: make direct packet write unclone more robust Given this seems to be quite fragile and can easily slip through the cracks, lets make direct packet write more robust by requiring that future program types which allow for such write must provide a prologue callback. In case of XDP and sk_msg it's noop, thus add a generic noop handler there. The latter starts out with NULL data/data_end unconditionally when sg pages are shared. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 +++++- net/core/filter.c | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5fc9a658af0e..171a2c88e77d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5709,7 +5709,11 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) bool is_narrower_load; u32 target_size; - if (ops->gen_prologue) { + if (ops->gen_prologue || env->seen_direct_write) { + if (!ops->gen_prologue) { + verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + } cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog); if (cnt >= ARRAY_SIZE(insn_buf)) { diff --git a/net/core/filter.c b/net/core/filter.c index 3fdddfa9a0fd..cd648d09a8e5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5644,6 +5644,15 @@ static bool sock_filter_is_valid_access(int off, int size, prog->expected_attach_type); } +static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + /* Neither direct read nor direct write requires any preliminary + * action. + */ + return 0; +} + static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, const struct bpf_prog *prog, int drop_verdict) { @@ -7210,6 +7219,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, }; const struct bpf_prog_ops xdp_prog_ops = { @@ -7308,6 +7318,7 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = { .get_func_proto = sk_msg_func_proto, .is_valid_access = sk_msg_is_valid_access, .convert_ctx_access = sk_msg_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, }; const struct bpf_prog_ops sk_msg_prog_ops = { -- cgit v1.2.3 From ede95a63b5e84ddeea6b0c473b36ab8bfd8c6ce3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Oct 2018 01:11:04 +0200 Subject: bpf: add bpf_jit_limit knob to restrict unpriv allocations Rick reported that the BPF JIT could potentially fill the entire module space with BPF programs from unprivileged users which would prevent later attempts to load normal kernel modules or privileged BPF programs, for example. If JIT was enabled but unsuccessful to generate the image, then before commit 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config") we would always fall back to the BPF interpreter. Nowadays in the case where the CONFIG_BPF_JIT_ALWAYS_ON could be set, then the load will abort with a failure since the BPF interpreter was compiled out. Add a global limit and enforce it for unprivileged users such that in case of BPF interpreter compiled out we fail once the limit has been reached or we fall back to BPF interpreter earlier w/o using module mem if latter was compiled in. In a next step, fair share among unprivileged users can be resolved in particular for the case where we would fail hard once limit is reached. Fixes: 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config") Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64") Co-Developed-by: Rick Edgecombe Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Eric Dumazet Cc: Jann Horn Cc: Kees Cook Cc: LKML Signed-off-by: Alexei Starovoitov --- Documentation/sysctl/net.txt | 8 ++++++++ include/linux/filter.h | 1 + kernel/bpf/core.c | 49 +++++++++++++++++++++++++++++++++++++++++--- net/core/sysctl_net_core.c | 10 +++++++-- 4 files changed, 63 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 9ecde517728c..2793d4eac55f 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -92,6 +92,14 @@ Values : 0 - disable JIT kallsyms export (default value) 1 - enable JIT kallsyms export for privileged users only +bpf_jit_limit +------------- + +This enforces a global limit for memory allocations to the BPF JIT +compiler in order to reject unprivileged JIT requests once it has +been surpassed. bpf_jit_limit contains the value of the global limit +in bytes. + dev_weight -------------- diff --git a/include/linux/filter.h b/include/linux/filter.h index 91b4c934f02e..de629b706d1d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -854,6 +854,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; +extern int bpf_jit_limit; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7c7eeea8cffc..6377225b2082 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -365,10 +365,13 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) } #ifdef CONFIG_BPF_JIT +# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) + /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; int bpf_jit_kallsyms __read_mostly; +int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, @@ -577,27 +580,64 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, return ret; } +static atomic_long_t bpf_jit_current; + +#if defined(MODULES_VADDR) +static int __init bpf_jit_charge_init(void) +{ + /* Only used as heuristic here to derive limit. */ + bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, + PAGE_SIZE), INT_MAX); + return 0; +} +pure_initcall(bpf_jit_charge_init); +#endif + +static int bpf_jit_charge_modmem(u32 pages) +{ + if (atomic_long_add_return(pages, &bpf_jit_current) > + (bpf_jit_limit >> PAGE_SHIFT)) { + if (!capable(CAP_SYS_ADMIN)) { + atomic_long_sub(pages, &bpf_jit_current); + return -EPERM; + } + } + + return 0; +} + +static void bpf_jit_uncharge_modmem(u32 pages) +{ + atomic_long_sub(pages, &bpf_jit_current); +} + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *hdr; - unsigned int size, hole, start; + u32 size, hole, start, pages; /* Most of BPF filters are really small, but if some of them * fill a page, allow at least 128 extra bytes to insert a * random section of illegal instructions. */ size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); + pages = size / PAGE_SIZE; + + if (bpf_jit_charge_modmem(pages)) + return NULL; hdr = module_alloc(size); - if (hdr == NULL) + if (!hdr) { + bpf_jit_uncharge_modmem(pages); return NULL; + } /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(hdr, size); - hdr->pages = size / PAGE_SIZE; + hdr->pages = pages; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -610,7 +650,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, void bpf_jit_binary_free(struct bpf_binary_header *hdr) { + u32 pages = hdr->pages; + module_memfree(hdr); + bpf_jit_uncharge_modmem(pages); } /* This symbol is only overridden by archs that have different diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b1a2c5e38530..37b4667128a3 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -279,7 +279,6 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, return ret; } -# ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -290,7 +289,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } -# endif #endif static struct ctl_table net_core_table[] = { @@ -397,6 +395,14 @@ static struct ctl_table net_core_table[] = { .extra2 = &one, }, # endif + { + .procname = "bpf_jit_limit", + .data = &bpf_jit_limit, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &one, + }, #endif { .procname = "netdev_tstamp_prequeue", -- cgit v1.2.3 From d8fd9e106fbc291167ebb675ad69234597d0fd98 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 27 Oct 2018 00:49:02 +0200 Subject: bpf: fix wrong helper enablement in cgroup local storage Commit cd3394317653 ("bpf: introduce the bpf_get_local_storage() helper function") enabled the bpf_get_local_storage() helper also for BPF program types where it does not make sense to use them. They have been added both in sk_skb_func_proto() and sk_msg_func_proto() even though both program types are not invoked in combination with cgroups, and neither through BPF_PROG_RUN_ARRAY(). In the latter the bpf_cgroup_storage_set() is set shortly before BPF program invocation. Later, the helper bpf_get_local_storage() retrieves this prior set up per-cpu pointer and hands the buffer to the BPF program. The map argument in there solely retrieves the enum bpf_cgroup_storage_type from a local storage map associated with the program and based on the type returns either the global or per-cpu storage. However, there is no specific association between the program's map and the actual content in bpf_cgroup_storage[]. Meaning, any BPF program that would have been properly run from the cgroup side through BPF_PROG_RUN_ARRAY() where bpf_cgroup_storage_set() was performed, and that is later unloaded such that prog / maps are teared down will cause a use after free if that pointer is retrieved from programs that are not run through BPF_PROG_RUN_ARRAY() but have the cgroup local storage helper enabled in their func proto. Lets just remove it from the two sock_map program types to fix it. Auditing through the types where this helper is enabled, it appears that these are the only ones where it was mistakenly allowed. Fixes: cd3394317653 ("bpf: introduce the bpf_get_local_storage() helper function") Signed-off-by: Daniel Borkmann Cc: Roman Gushchin Acked-by: John Fastabend Acked-by: Roman Gushchin Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index cd648d09a8e5..e521c5ebc7d1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5264,8 +5264,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_msg_pull_data_proto; case BPF_FUNC_msg_push_data: return &bpf_msg_push_data_proto; - case BPF_FUNC_get_local_storage: - return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -5296,8 +5294,6 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_map_proto; case BPF_FUNC_sk_redirect_hash: return &bpf_sk_redirect_hash_proto; - case BPF_FUNC_get_local_storage: - return &bpf_get_local_storage_proto; #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: return &bpf_sk_lookup_tcp_proto; -- cgit v1.2.3 From ece23711dd956cd5053c9cb03e9fe0668f9c8894 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 28 Oct 2018 10:35:12 -0700 Subject: net: Properly unlink GRO packets on overflow. Just like with normal GRO processing, we have to initialize skb->next to NULL when we unlink overflow packets from the GRO hash lists. Fixes: d4546c2509b1 ("net: Convert GRO SKB handling to list_head.") Reported-by: Oleksandr Natalenko Tested-by: Oleksandr Natalenko Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 022ad73d6253..77d43ae2a7bb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5457,7 +5457,7 @@ static void gro_flush_oldest(struct list_head *head) /* Do not adjust napi->gro_hash[].count, caller is adding a new * SKB to the chain. */ - list_del(&oldest->list); + skb_list_del_init(oldest); napi_gro_complete(oldest); } -- cgit v1.2.3 From 0fe5119e267f3e3d8ac206895f5922195ec55a8a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 27 Oct 2018 12:07:47 +0300 Subject: net: bridge: remove ipv6 zero address check in mcast queries Recently a check was added which prevents marking of routers with zero source address, but for IPv6 that cannot happen as the relevant RFCs actually forbid such packets: RFC 2710 (MLDv1): "To be valid, the Query message MUST come from a link-local IPv6 Source Address, be at least 24 octets long, and have a correct MLD checksum." Same goes for RFC 3810. And also it can be seen as a requirement in ipv6_mc_check_mld_query() which is used by the bridge to validate the message before processing it. Thus any queries with :: source address won't be processed anyway. So just remove the check for zero IPv6 source address from the query processing function. Fixes: 5a2de63fd1a5 ("bridge: do not add port to router list when receives query with source 0.0.0.0") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 41cdafbf2ebe..6bac0d6b7b94 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1428,8 +1428,7 @@ static void br_multicast_query_received(struct net_bridge *br, * is 0.0.0.0 should not be added to router port list. */ if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || - (saddr->proto == htons(ETH_P_IPV6) && - !ipv6_addr_any(&saddr->u.ip6))) + saddr->proto == htons(ETH_P_IPV6)) br_multicast_mark_router(br, port); } -- cgit v1.2.3 From 38b4f18d56372e1e21771ab7b0357b853330186c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 Oct 2018 15:51:06 -0700 Subject: net: sched: gred: pass the right attribute to gred_change_table_def() gred_change_table_def() takes a pointer to TCA_GRED_DPS attribute, and expects it will be able to interpret its contents as struct tc_gred_sopt. Pass the correct gred attribute, instead of TCA_OPTIONS. This bug meant the table definition could never be changed after Qdisc was initialized (unless whatever TCA_OPTIONS contained both passed netlink validation and was a valid struct tc_gred_sopt...). Old behaviour: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 RTNETLINK answers: Invalid argument Now: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 Fixes: f62d6b936df5 ("[PKT_SCHED]: GRED: Use central VQ change procedure") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index cbe4831f46f4..4a042abf844c 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -413,7 +413,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { if (tb[TCA_GRED_LIMIT] != NULL) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); - return gred_change_table_def(sch, opt); + return gred_change_table_def(sch, tb[TCA_GRED_DPS]); } if (tb[TCA_GRED_PARMS] == NULL || -- cgit v1.2.3 From 747569b0a7c537d680bc94a988be6caad9960488 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Mon, 29 Oct 2018 09:15:22 +0900 Subject: net: diag: document swapped src/dst in udp_dump_one. Since its inception, udp_dump_one has had a bug where userspace needs to swap src and dst addresses and ports in order to find the socket it wants. This is because it passes the socket source address to __udp[46]_lib_lookup's saddr argument, but those functions are intended to find local sockets matching received packets, so saddr is the remote address, not the local address. This can no longer be fixed for backwards compatibility reasons, so add a brief comment explaining that this is the case. This will avoid confusion and help ensure SOCK_DIAG implementations of new protocols don't have the same problem. Fixes: a925aa00a55 ("udp_diag: Implement the get_exact dumping functionality") Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/udp_diag.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d9ad986c7b2c..5cbb9be05295 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -42,6 +42,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, rcu_read_lock(); if (req->sdiag_family == AF_INET) + /* src and dst are swapped for historical reasons */ sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, -- cgit v1.2.3 From ae74136b4bb64440a55117e12065b8c282ab6c1a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Oct 2018 12:01:22 -0400 Subject: SUNRPC: Allow cache lookups to use RCU protection rather than the r/w spinlock Instead of the reader/writer spinlock, allow cache lookups to use RCU for looking up entries. This is more efficient since modifications can occur while other entries are being looked up. Note that for now, we keep the reader/writer spinlock until all users have been converted to use RCU-safe freeing of their cache entries. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 12 ++++++ net/sunrpc/cache.c | 93 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 91 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 40d2822f0e2f..cf3e17ee2786 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -167,6 +167,9 @@ extern const struct file_operations cache_file_operations_pipefs; extern const struct file_operations content_file_operations_pipefs; extern const struct file_operations cache_flush_operations_pipefs; +extern struct cache_head * +sunrpc_cache_lookup_rcu(struct cache_detail *detail, + struct cache_head *key, int hash); extern struct cache_head * sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash); @@ -186,6 +189,12 @@ static inline struct cache_head *cache_get(struct cache_head *h) return h; } +static inline struct cache_head *cache_get_rcu(struct cache_head *h) +{ + if (kref_get_unless_zero(&h->ref)) + return h; + return NULL; +} static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { @@ -227,6 +236,9 @@ extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *); extern void *cache_seq_start(struct seq_file *file, loff_t *pos); extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos); extern void cache_seq_stop(struct seq_file *file, void *p); +extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos); +extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos); +extern void cache_seq_stop_rcu(struct seq_file *file, void *p); extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index aa8e62d61f4d..7593afed9036 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,6 +54,27 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } +static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail, + struct cache_head *key, + int hash) +{ + struct hlist_head *head = &detail->hash_table[hash]; + struct cache_head *tmp; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp, head, cache_list) { + if (detail->match(tmp, key)) { + if (cache_is_expired(detail, tmp)) + continue; + tmp = cache_get_rcu(tmp); + rcu_read_unlock(); + return tmp; + } + } + rcu_read_unlock(); + return NULL; +} + static struct cache_head *sunrpc_cache_find(struct cache_detail *detail, struct cache_head *key, int hash) { @@ -61,7 +82,6 @@ static struct cache_head *sunrpc_cache_find(struct cache_detail *detail, struct cache_head *tmp; read_lock(&detail->hash_lock); - hlist_for_each_entry(tmp, head, cache_list) { if (detail->match(tmp, key)) { if (cache_is_expired(detail, tmp)) @@ -96,10 +116,10 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, write_lock(&detail->hash_lock); /* check if entry appeared while we slept */ - hlist_for_each_entry(tmp, head, cache_list) { + hlist_for_each_entry_rcu(tmp, head, cache_list) { if (detail->match(tmp, key)) { if (cache_is_expired(detail, tmp)) { - hlist_del_init(&tmp->cache_list); + hlist_del_init_rcu(&tmp->cache_list); detail->entries --; freeme = tmp; break; @@ -111,7 +131,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, } } - hlist_add_head(&new->cache_list, head); + hlist_add_head_rcu(&new->cache_list, head); detail->entries++; cache_get(new); write_unlock(&detail->hash_lock); @@ -121,6 +141,19 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, return new; } +struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail, + struct cache_head *key, int hash) +{ + struct cache_head *ret; + + ret = sunrpc_cache_find_rcu(detail, key, hash); + if (ret) + return ret; + /* Didn't find anything, insert an empty entry */ + return sunrpc_cache_add_entry(detail, key, hash); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu); + struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { @@ -134,6 +167,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, } EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); + static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); static void cache_fresh_locked(struct cache_head *head, time_t expiry, @@ -450,7 +484,7 @@ static int cache_clean(void) if (!cache_is_expired(current_detail, ch)) continue; - hlist_del_init(&ch->cache_list); + hlist_del_init_rcu(&ch->cache_list); current_detail->entries--; rv = 1; break; @@ -521,7 +555,7 @@ void cache_purge(struct cache_detail *detail) for (i = 0; i < detail->hash_size; i++) { head = &detail->hash_table[i]; hlist_for_each_entry_safe(ch, tmp, head, cache_list) { - hlist_del_init(&ch->cache_list); + hlist_del_init_rcu(&ch->cache_list); detail->entries--; set_bit(CACHE_CLEANED, &ch->flags); @@ -1306,21 +1340,19 @@ EXPORT_SYMBOL_GPL(qword_get); * get a header, then pass each real item in the cache */ -void *cache_seq_start(struct seq_file *m, loff_t *pos) - __acquires(cd->hash_lock) +static void *__cache_seq_start(struct seq_file *m, loff_t *pos) { loff_t n = *pos; unsigned int hash, entry; struct cache_head *ch; struct cache_detail *cd = m->private; - read_lock(&cd->hash_lock); if (!n--) return SEQ_START_TOKEN; hash = n >> 32; entry = n & ((1LL<<32) - 1); - hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list) + hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list) if (!entry--) return ch; n &= ~((1LL<<32) - 1); @@ -1332,9 +1364,19 @@ void *cache_seq_start(struct seq_file *m, loff_t *pos) if (hash >= cd->hash_size) return NULL; *pos = n+1; - return hlist_entry_safe(cd->hash_table[hash].first, + return hlist_entry_safe(rcu_dereference_raw( + hlist_first_rcu(&cd->hash_table[hash])), struct cache_head, cache_list); } + +void *cache_seq_start(struct seq_file *m, loff_t *pos) + __acquires(cd->hash_lock) +{ + struct cache_detail *cd = m->private; + + read_lock(&cd->hash_lock); + return __cache_seq_start(m, pos); +} EXPORT_SYMBOL_GPL(cache_seq_start); void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) @@ -1350,7 +1392,8 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) *pos += 1LL<<32; } else { ++*pos; - return hlist_entry_safe(ch->cache_list.next, + return hlist_entry_safe(rcu_dereference_raw( + hlist_next_rcu(&ch->cache_list)), struct cache_head, cache_list); } *pos &= ~((1LL<<32) - 1); @@ -1362,7 +1405,8 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) if (hash >= cd->hash_size) return NULL; ++*pos; - return hlist_entry_safe(cd->hash_table[hash].first, + return hlist_entry_safe(rcu_dereference_raw( + hlist_first_rcu(&cd->hash_table[hash])), struct cache_head, cache_list); } EXPORT_SYMBOL_GPL(cache_seq_next); @@ -1375,6 +1419,27 @@ void cache_seq_stop(struct seq_file *m, void *p) } EXPORT_SYMBOL_GPL(cache_seq_stop); +void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + return __cache_seq_start(m, pos); +} +EXPORT_SYMBOL_GPL(cache_seq_start_rcu); + +void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos) +{ + return cache_seq_next(file, p, pos); +} +EXPORT_SYMBOL_GPL(cache_seq_next_rcu); + +void cache_seq_stop_rcu(struct seq_file *m, void *p) + __releases(RCU) +{ + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(cache_seq_stop_rcu); + static int c_show(struct seq_file *m, void *p) { struct cache_head *cp = p; @@ -1863,7 +1928,7 @@ void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) { write_lock(&cd->hash_lock); if (!hlist_unhashed(&h->cache_list)){ - hlist_del_init(&h->cache_list); + hlist_del_init_rcu(&h->cache_list); cd->entries--; write_unlock(&cd->hash_lock); cache_put(h, cd); -- cgit v1.2.3 From fd5d2f78261bedd3a40feb1474323fddb88398b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:46 -0400 Subject: SUNRPC: Make server side AUTH_UNIX use lockless lookups Convert structs ip_map and unix_gid to use RCU protected lookups. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 84cf39021a03..fb9041b92f72 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -97,6 +97,7 @@ struct ip_map { char m_class[8]; /* e.g. "nfsd" */ struct in6_addr m_addr; struct unix_domain *m_client; + struct rcu_head m_rcu; }; static void ip_map_put(struct kref *kref) @@ -107,7 +108,7 @@ static void ip_map_put(struct kref *kref) if (test_bit(CACHE_VALID, &item->flags) && !test_bit(CACHE_NEGATIVE, &item->flags)) auth_domain_put(&im->m_client->h); - kfree(im); + kfree_rcu(im, m_rcu); } static inline int hash_ip6(const struct in6_addr *ip) @@ -286,9 +287,9 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, strcpy(ip.m_class, class); ip.m_addr = *addr; - ch = sunrpc_cache_lookup(cd, &ip.h, - hash_str(class, IP_HASHBITS) ^ - hash_ip6(addr)); + ch = sunrpc_cache_lookup_rcu(cd, &ip.h, + hash_str(class, IP_HASHBITS) ^ + hash_ip6(addr)); if (ch) return container_of(ch, struct ip_map, h); @@ -418,6 +419,7 @@ struct unix_gid { struct cache_head h; kuid_t uid; struct group_info *gi; + struct rcu_head rcu; }; static int unix_gid_hash(kuid_t uid) @@ -432,7 +434,7 @@ static void unix_gid_put(struct kref *kref) if (test_bit(CACHE_VALID, &item->flags) && !test_bit(CACHE_NEGATIVE, &item->flags)) put_group_info(ug->gi); - kfree(ug); + kfree_rcu(ug, rcu); } static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew) @@ -625,7 +627,7 @@ static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid) struct cache_head *ch; ug.uid = uid; - ch = sunrpc_cache_lookup(cd, &ug.h, unix_gid_hash(uid)); + ch = sunrpc_cache_lookup_rcu(cd, &ug.h, unix_gid_hash(uid)); if (ch) return container_of(ch, struct unix_gid, h); else -- cgit v1.2.3 From 6d1616b26cd91f2502111d098cd9c288dbafe5c8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:48 -0400 Subject: SUNRPC: Lockless server RPCSEC_GSS context lookup Use RCU protection for looking up the RPCSEC_GSS context. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 87c71fb0f0ea..1ece4bc3eb8d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -76,6 +76,7 @@ struct rsi { struct xdr_netobj in_handle, in_token; struct xdr_netobj out_handle, out_token; int major_status, minor_status; + struct rcu_head rcu_head; }; static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old); @@ -89,13 +90,21 @@ static void rsi_free(struct rsi *rsii) kfree(rsii->out_token.data); } -static void rsi_put(struct kref *ref) +static void rsi_free_rcu(struct rcu_head *head) { - struct rsi *rsii = container_of(ref, struct rsi, h.ref); + struct rsi *rsii = container_of(head, struct rsi, rcu_head); + rsi_free(rsii); kfree(rsii); } +static void rsi_put(struct kref *ref) +{ + struct rsi *rsii = container_of(ref, struct rsi, h.ref); + + call_rcu(&rsii->rcu_head, rsi_free_rcu); +} + static inline int rsi_hash(struct rsi *item) { return hash_mem(item->in_handle.data, item->in_handle.len, RSI_HASHBITS) @@ -282,7 +291,7 @@ static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item) struct cache_head *ch; int hash = rsi_hash(item); - ch = sunrpc_cache_lookup(cd, &item->h, hash); + ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash); if (ch) return container_of(ch, struct rsi, h); else @@ -330,6 +339,7 @@ struct rsc { struct svc_cred cred; struct gss_svc_seq_data seqdata; struct gss_ctx *mechctx; + struct rcu_head rcu_head; }; static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old); @@ -343,12 +353,22 @@ static void rsc_free(struct rsc *rsci) free_svc_cred(&rsci->cred); } +static void rsc_free_rcu(struct rcu_head *head) +{ + struct rsc *rsci = container_of(head, struct rsc, rcu_head); + + kfree(rsci->handle.data); + kfree(rsci); +} + static void rsc_put(struct kref *ref) { struct rsc *rsci = container_of(ref, struct rsc, h.ref); - rsc_free(rsci); - kfree(rsci); + if (rsci->mechctx) + gss_delete_sec_context(&rsci->mechctx); + free_svc_cred(&rsci->cred); + call_rcu(&rsci->rcu_head, rsc_free_rcu); } static inline int @@ -542,7 +562,7 @@ static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item) struct cache_head *ch; int hash = rsc_hash(item); - ch = sunrpc_cache_lookup(cd, &item->h, hash); + ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash); if (ch) return container_of(ch, struct rsc, h); else -- cgit v1.2.3 From d48cf356a13073853f19be6ca5ebbecfc2762ebe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:51 -0400 Subject: SUNRPC: Remove non-RCU protected lookup Clean up the cache code by removing the non-RCU protected lookup. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- Documentation/filesystems/nfs/rpc-cache.txt | 6 +-- include/linux/sunrpc/cache.h | 6 --- net/sunrpc/cache.c | 61 ++--------------------------- 3 files changed, 7 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/Documentation/filesystems/nfs/rpc-cache.txt b/Documentation/filesystems/nfs/rpc-cache.txt index ebcaaee21616..c4dac829db0f 100644 --- a/Documentation/filesystems/nfs/rpc-cache.txt +++ b/Documentation/filesystems/nfs/rpc-cache.txt @@ -84,7 +84,7 @@ Creating a Cache A message from user space has arrived to fill out a cache entry. It is in 'buf' of length 'len'. cache_parse should parse this, find the item in the - cache with sunrpc_cache_lookup, and update the item + cache with sunrpc_cache_lookup_rcu, and update the item with sunrpc_cache_update. @@ -95,7 +95,7 @@ Creating a Cache Using a cache ------------- -To find a value in a cache, call sunrpc_cache_lookup passing a pointer +To find a value in a cache, call sunrpc_cache_lookup_rcu passing a pointer to the cache_head in a sample item with the 'key' fields filled in. This will be passed to ->match to identify the target entry. If no entry is found, a new entry will be create, added to the cache, and @@ -116,7 +116,7 @@ item does become valid, the deferred copy of the request will be revisited (->revisit). It is expected that this method will reschedule the request for processing. -The value returned by sunrpc_cache_lookup can also be passed to +The value returned by sunrpc_cache_lookup_rcu can also be passed to sunrpc_cache_update to set the content for the item. A second item is passed which should hold the content. If the item found by _lookup has valid data, then it is discarded and a new item is created. This diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index cf3e17ee2786..c3d67e893430 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -171,9 +171,6 @@ extern struct cache_head * sunrpc_cache_lookup_rcu(struct cache_detail *detail, struct cache_head *key, int hash); extern struct cache_head * -sunrpc_cache_lookup(struct cache_detail *detail, - struct cache_head *key, int hash); -extern struct cache_head * sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); @@ -233,9 +230,6 @@ extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *); /* Must store cache_detail in seq_file->private if using next three functions */ -extern void *cache_seq_start(struct seq_file *file, loff_t *pos); -extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos); -extern void cache_seq_stop(struct seq_file *file, void *p); extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos); extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos); extern void cache_seq_stop_rcu(struct seq_file *file, void *p); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 7593afed9036..593cf8607414 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -75,27 +75,6 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail, return NULL; } -static struct cache_head *sunrpc_cache_find(struct cache_detail *detail, - struct cache_head *key, int hash) -{ - struct hlist_head *head = &detail->hash_table[hash]; - struct cache_head *tmp; - - read_lock(&detail->hash_lock); - hlist_for_each_entry(tmp, head, cache_list) { - if (detail->match(tmp, key)) { - if (cache_is_expired(detail, tmp)) - /* This entry is expired, we will discard it. */ - break; - cache_get(tmp); - read_unlock(&detail->hash_lock); - return tmp; - } - } - read_unlock(&detail->hash_lock); - return NULL; -} - static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, struct cache_head *key, int hash) @@ -154,20 +133,6 @@ struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail, } EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu); -struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, - struct cache_head *key, int hash) -{ - struct cache_head *ret; - - ret = sunrpc_cache_find(detail, key, hash); - if (ret) - return ret; - /* Didn't find anything, insert an empty entry */ - return sunrpc_cache_add_entry(detail, key, hash); -} -EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); - - static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); static void cache_fresh_locked(struct cache_head *head, time_t expiry, @@ -1369,17 +1334,7 @@ static void *__cache_seq_start(struct seq_file *m, loff_t *pos) struct cache_head, cache_list); } -void *cache_seq_start(struct seq_file *m, loff_t *pos) - __acquires(cd->hash_lock) -{ - struct cache_detail *cd = m->private; - - read_lock(&cd->hash_lock); - return __cache_seq_start(m, pos); -} -EXPORT_SYMBOL_GPL(cache_seq_start); - -void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) +static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) { struct cache_head *ch = p; int hash = (*pos >> 32); @@ -1411,14 +1366,6 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) } EXPORT_SYMBOL_GPL(cache_seq_next); -void cache_seq_stop(struct seq_file *m, void *p) - __releases(cd->hash_lock) -{ - struct cache_detail *cd = m->private; - read_unlock(&cd->hash_lock); -} -EXPORT_SYMBOL_GPL(cache_seq_stop); - void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos) __acquires(RCU) { @@ -1466,9 +1413,9 @@ static int c_show(struct seq_file *m, void *p) } static const struct seq_operations cache_content_op = { - .start = cache_seq_start, - .next = cache_seq_next, - .stop = cache_seq_stop, + .start = cache_seq_start_rcu, + .next = cache_seq_next_rcu, + .stop = cache_seq_stop_rcu, .show = c_show, }; -- cgit v1.2.3 From 1863d77f15da0addcd293a1719fa5d3ef8cde3ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:52 -0400 Subject: SUNRPC: Replace the cache_detail->hash_lock with a regular spinlock Now that the reader functions are all RCU protected, use a regular spinlock rather than a reader/writer lock. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 2 +- net/sunrpc/cache.c | 46 ++++++++++++++++++++++---------------------- 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index c3d67e893430..5a3e95017fc6 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -67,7 +67,7 @@ struct cache_detail { struct module * owner; int hash_size; struct hlist_head * hash_table; - rwlock_t hash_lock; + spinlock_t hash_lock; char *name; void (*cache_put)(struct kref *); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 593cf8607414..f96345b1180e 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -92,7 +92,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, cache_init(new, detail); detail->init(new, key); - write_lock(&detail->hash_lock); + spin_lock(&detail->hash_lock); /* check if entry appeared while we slept */ hlist_for_each_entry_rcu(tmp, head, cache_list) { @@ -104,7 +104,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, break; } cache_get(tmp); - write_unlock(&detail->hash_lock); + spin_unlock(&detail->hash_lock); cache_put(new, detail); return tmp; } @@ -113,7 +113,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, hlist_add_head_rcu(&new->cache_list, head); detail->entries++; cache_get(new); - write_unlock(&detail->hash_lock); + spin_unlock(&detail->hash_lock); if (freeme) cache_put(freeme, detail); @@ -167,18 +167,18 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, struct cache_head *tmp; if (!test_bit(CACHE_VALID, &old->flags)) { - write_lock(&detail->hash_lock); + spin_lock(&detail->hash_lock); if (!test_bit(CACHE_VALID, &old->flags)) { if (test_bit(CACHE_NEGATIVE, &new->flags)) set_bit(CACHE_NEGATIVE, &old->flags); else detail->update(old, new); cache_fresh_locked(old, new->expiry_time, detail); - write_unlock(&detail->hash_lock); + spin_unlock(&detail->hash_lock); cache_fresh_unlocked(old, detail); return old; } - write_unlock(&detail->hash_lock); + spin_unlock(&detail->hash_lock); } /* We need to insert a new entry */ tmp = detail->alloc(); @@ -189,7 +189,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, cache_init(tmp, detail); detail->init(tmp, old); - write_lock(&detail->hash_lock); + spin_lock(&detail->hash_lock); if (test_bit(CACHE_NEGATIVE, &new->flags)) set_bit(CACHE_NEGATIVE, &tmp->flags); else @@ -199,7 +199,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, cache_get(tmp); cache_fresh_locked(tmp, new->expiry_time, detail); cache_fresh_locked(old, 0, detail); - write_unlock(&detail->hash_lock); + spin_unlock(&detail->hash_lock); cache_fresh_unlocked(tmp, detail); cache_fresh_unlocked(old, detail); cache_put(old, detail); @@ -239,7 +239,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h { int rv; - write_lock(&detail->hash_lock); + spin_lock(&detail->hash_lock); rv = cache_is_valid(h); if (rv == -EAGAIN) { set_bit(CACHE_NEGATIVE, &h->flags); @@ -247,7 +247,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h detail); rv = -ENOENT; } - write_unlock(&detail->hash_lock); + spin_unlock(&detail->hash_lock); cache_fresh_unlocked(h, detail); return rv; } @@ -357,7 +357,7 @@ static struct delayed_work cache_cleaner; void sunrpc_init_cache_detail(struct cache_detail *cd) { - rwlock_init(&cd->hash_lock); + spin_lock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); spin_lock(&cache_list_lock); cd->nextcheck = 0; @@ -377,11 +377,11 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); - write_lock(&cd->hash_lock); + spin_lock(&cd->hash_lock); if (current_detail == cd) current_detail = NULL; list_del_init(&cd->others); - write_unlock(&cd->hash_lock); + spin_unlock(&cd->hash_lock); spin_unlock(&cache_list_lock); if (list_empty(&cache_list)) { /* module must be being unloaded so its safe to kill the worker */ @@ -438,7 +438,7 @@ static int cache_clean(void) struct hlist_head *head; struct hlist_node *tmp; - write_lock(¤t_detail->hash_lock); + spin_lock(¤t_detail->hash_lock); /* Ok, now to clean this strand */ @@ -455,7 +455,7 @@ static int cache_clean(void) break; } - write_unlock(¤t_detail->hash_lock); + spin_unlock(¤t_detail->hash_lock); d = current_detail; if (!ch) current_index ++; @@ -510,9 +510,9 @@ void cache_purge(struct cache_detail *detail) struct hlist_node *tmp = NULL; int i = 0; - write_lock(&detail->hash_lock); + spin_lock(&detail->hash_lock); if (!detail->entries) { - write_unlock(&detail->hash_lock); + spin_unlock(&detail->hash_lock); return; } @@ -524,13 +524,13 @@ void cache_purge(struct cache_detail *detail) detail->entries--; set_bit(CACHE_CLEANED, &ch->flags); - write_unlock(&detail->hash_lock); + spin_unlock(&detail->hash_lock); cache_fresh_unlocked(ch, detail); cache_put(ch, detail); - write_lock(&detail->hash_lock); + spin_lock(&detail->hash_lock); } } - write_unlock(&detail->hash_lock); + spin_unlock(&detail->hash_lock); } EXPORT_SYMBOL_GPL(cache_purge); @@ -1873,13 +1873,13 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) { - write_lock(&cd->hash_lock); + spin_lock(&cd->hash_lock); if (!hlist_unhashed(&h->cache_list)){ hlist_del_init_rcu(&h->cache_list); cd->entries--; - write_unlock(&cd->hash_lock); + spin_unlock(&cd->hash_lock); cache_put(h, cd); } else - write_unlock(&cd->hash_lock); + spin_unlock(&cd->hash_lock); } EXPORT_SYMBOL_GPL(sunrpc_cache_unhash); -- cgit v1.2.3 From 4c8e5537bb28f0e4b2ca60ecb0ca1ea12b82adf6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:53 -0400 Subject: SUNRPC: Simplify TCP receive code Use the fact that the iov iterators already have functionality for skipping a base offset. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 53 ++++++++++++++-------------------------------------- 1 file changed, 14 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5445145e639c..64765f08ae07 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -325,59 +325,34 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) /* * Generic recvfrom routine. */ -static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, - int buflen) +static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, + unsigned int nr, size_t buflen, unsigned int base) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); - struct msghdr msg = { - .msg_flags = MSG_DONTWAIT, - }; - int len; + struct msghdr msg = { NULL }; + ssize_t len; rqstp->rq_xprt_hlen = 0; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen); - len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags); + if (base != 0) { + iov_iter_advance(&msg.msg_iter, base); + buflen -= base; + } + len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); /* If we read a full record, then assume there may be more * data to read (stream based sockets only!) */ if (len == buflen) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - dprintk("svc: socket %p recvfrom(%p, %zu) = %d\n", + dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n", svsk, iov[0].iov_base, iov[0].iov_len, len); return len; } -static int svc_partial_recvfrom(struct svc_rqst *rqstp, - struct kvec *iov, int nr, - int buflen, unsigned int base) -{ - size_t save_iovlen; - void *save_iovbase; - unsigned int i; - int ret; - - if (base == 0) - return svc_recvfrom(rqstp, iov, nr, buflen); - - for (i = 0; i < nr; i++) { - if (iov[i].iov_len > base) - break; - base -= iov[i].iov_len; - } - save_iovlen = iov[i].iov_len; - save_iovbase = iov[i].iov_base; - iov[i].iov_len -= base; - iov[i].iov_base += base; - ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); - iov[i].iov_len = save_iovlen; - iov[i].iov_base = save_iovbase; - return ret; -} - /* * Set socket snd and rcv buffer lengths */ @@ -962,7 +937,8 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; - if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) + len = svc_recvfrom(rqstp, &iov, 1, want, 0); + if (len < 0) goto error; svsk->sk_tcplen += len; @@ -1088,14 +1064,13 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) vec = rqstp->rq_vec; - pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], - svsk->sk_datalen + want); + pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want); rqstp->rq_respages = &rqstp->rq_pages[pnum]; rqstp->rq_next_page = rqstp->rq_respages + 1; /* Now receive data */ - len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); + len = svc_recvfrom(rqstp, vec, pnum, base + want, base); if (len >= 0) { svsk->sk_tcplen += len; svsk->sk_datalen += len; -- cgit v1.2.3 From f3c1fd0ee294abd4367dfa72d89f016c682202f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:15:56 -0400 Subject: svcrdma: Reduce max_send_sges There's no need to request a large number of send SGEs because the inline threshold already constrains the number of SGEs per Send. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 2848cafd4a17..2f7ec8912f49 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -475,10 +475,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Qualify the transport resource defaults with the * capabilities of this particular device */ - newxprt->sc_max_send_sges = dev->attrs.max_send_sge; - /* transport hdr, head iovec, one page list entry, tail iovec */ - if (newxprt->sc_max_send_sges < 4) { - pr_err("svcrdma: too few Send SGEs available (%d)\n", + /* Transport header, head iovec, tail iovec */ + newxprt->sc_max_send_sges = 3; + /* Add one SGE per page list entry */ + newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE; + if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) { + pr_err("svcrdma: too few Send SGEs available (%d needed)\n", newxprt->sc_max_send_sges); goto errout; } -- cgit v1.2.3 From 596f2a1950437095fee3062b75935c3bfdda0289 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:16:01 -0400 Subject: svcrdma: Remove ->release_rqst call in bc reply handler Similar to a change made in the client's forward channel reply handler: The xprt_release_rqst_cong() call is not necessary. Also, release xprt->recv_lock when taking xprt->transport_lock to avoid disabling and enabling BH's while holding another spin lock. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index a68180090554..642f0310cc16 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -32,7 +32,6 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct kvec *dst, *src = &rcvbuf->head[0]; struct rpc_rqst *req; - unsigned long cwnd; u32 credits; size_t len; __be32 xid; @@ -66,6 +65,8 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, if (dst->iov_len < len) goto out_unlock; memcpy(dst->iov_base, p, len); + xprt_pin_rqst(req); + spin_unlock(&xprt->recv_lock); credits = be32_to_cpup(rdma_resp + 2); if (credits == 0) @@ -74,15 +75,13 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, credits = r_xprt->rx_buf.rb_bc_max_requests; spin_lock_bh(&xprt->transport_lock); - cwnd = xprt->cwnd; xprt->cwnd = credits << RPC_CWNDSHIFT; - if (xprt->cwnd > cwnd) - xprt_release_rqst_cong(req->rq_task); spin_unlock_bh(&xprt->transport_lock); - + spin_lock(&xprt->recv_lock); ret = 0; xprt_complete_rqst(req->rq_task, rcvbuf->len); + xprt_unpin_rqst(req); rcvbuf->len = 0; out_unlock: -- cgit v1.2.3 From 07880fa4968b07db9df0f126bb7fbde245ed12ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:16:06 -0400 Subject: svcrdma: Remove try_module_get from backchannel Since commit ffe1f0df5862 ("rpcrdma: Merge svcrdma and xprtrdma modules into one"), the forward and backchannel components are part of the same kernel module. A separate try_module_get() call in the backchannel code is no longer necessary. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 642f0310cc16..7e188bc793a2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -5,8 +5,6 @@ * Support for backward direction RPCs on RPC/RDMA (server-side). */ -#include - #include #include "xprt_rdma.h" @@ -255,7 +253,6 @@ xprt_rdma_bc_put(struct rpc_xprt *xprt) dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); xprt_free(xprt); - module_put(THIS_MODULE); } static const struct rpc_xprt_ops xprt_rdma_bc_procs = { @@ -327,20 +324,9 @@ xprt_setup_rdma_bc(struct xprt_create *args) args->bc_xprt->xpt_bc_xprt = xprt; xprt->bc_xprt = args->bc_xprt; - if (!try_module_get(THIS_MODULE)) - goto out_fail; - /* Final put for backchannel xprt is in __svc_rdma_free */ xprt_get(xprt); return xprt; - -out_fail: - xprt_rdma_free_addresses(xprt); - args->bc_xprt->xpt_bc_xprt = NULL; - args->bc_xprt->xpt_bc_xps = NULL; - xprt_put(xprt); - xprt_free(xprt); - return ERR_PTR(-EINVAL); } struct xprt_class xprt_rdma_bc = { -- cgit v1.2.3 From bb6ad5572c0022e17e846b382d7413cdcf8055be Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Oct 2018 15:54:15 -0400 Subject: nfsd: Fix an Oops in free_session() In call_xpt_users(), we delete the entry from the list, but we do not reinitialise it. This triggers the list poisoning when we later call unregister_xpt_user() in nfsd4_del_conns(). Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5185efb9027b..83ccd0221c98 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -989,7 +989,7 @@ static void call_xpt_users(struct svc_xprt *xprt) spin_lock(&xprt->xpt_lock); while (!list_empty(&xprt->xpt_users)) { u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list); - list_del(&u->list); + list_del_init(&u->list); u->callback(u); } spin_unlock(&xprt->xpt_lock); -- cgit v1.2.3 From 57c8a661d95dff48dd9c2f2496139082bbaf241a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 30 Oct 2018 15:09:49 -0700 Subject: mm: remove include/linux/bootmem.h Move remaining definitions and declarations from include/linux/bootmem.h into include/linux/memblock.h and remove the redundant header. The includes were replaced with the semantic patch below and then semi-automated removal of duplicated '#include @@ @@ - #include + #include [sfr@canb.auug.org.au: dma-direct: fix up for the removal of linux/bootmem.h] Link: http://lkml.kernel.org/r/20181002185342.133d1680@canb.auug.org.au [sfr@canb.auug.org.au: powerpc: fix up for removal of linux/bootmem.h] Link: http://lkml.kernel.org/r/20181005161406.73ef8727@canb.auug.org.au [sfr@canb.auug.org.au: x86/kaslr, ACPI/NUMA: fix for linux/bootmem.h removal] Link: http://lkml.kernel.org/r/20181008190341.5e396491@canb.auug.org.au Link: http://lkml.kernel.org/r/1536927045-23536-30-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Signed-off-by: Stephen Rothwell Acked-by: Michal Hocko Cc: Catalin Marinas Cc: Chris Zankel Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Jonas Bonn Cc: Jonathan Corbet Cc: Ley Foon Tan Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Paul Burton Cc: Richard Kuo Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Serge Semin Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/core_cia.c | 2 +- arch/alpha/kernel/core_irongate.c | 1 - arch/alpha/kernel/core_marvel.c | 2 +- arch/alpha/kernel/core_titan.c | 2 +- arch/alpha/kernel/core_tsunami.c | 2 +- arch/alpha/kernel/pci-noop.c | 2 +- arch/alpha/kernel/pci.c | 2 +- arch/alpha/kernel/pci_iommu.c | 2 +- arch/alpha/kernel/setup.c | 1 - arch/alpha/kernel/sys_nautilus.c | 2 +- arch/alpha/mm/init.c | 2 +- arch/alpha/mm/numa.c | 1 - arch/arc/kernel/unwind.c | 2 +- arch/arc/mm/highmem.c | 2 +- arch/arc/mm/init.c | 1 - arch/arm/kernel/devtree.c | 1 - arch/arm/kernel/setup.c | 1 - arch/arm/mach-omap2/omap_hwmod.c | 2 +- arch/arm/mm/dma-mapping.c | 1 - arch/arm/mm/init.c | 1 - arch/arm/xen/mm.c | 1 - arch/arm/xen/p2m.c | 2 +- arch/arm64/kernel/acpi.c | 1 - arch/arm64/kernel/acpi_numa.c | 1 - arch/arm64/kernel/setup.c | 1 - arch/arm64/mm/dma-mapping.c | 2 +- arch/arm64/mm/init.c | 1 - arch/arm64/mm/kasan_init.c | 1 - arch/arm64/mm/numa.c | 1 - arch/c6x/kernel/setup.c | 1 - arch/c6x/mm/init.c | 2 +- arch/h8300/kernel/setup.c | 1 - arch/h8300/mm/init.c | 2 +- arch/hexagon/kernel/dma.c | 2 +- arch/hexagon/kernel/setup.c | 2 +- arch/hexagon/mm/init.c | 1 - arch/ia64/kernel/crash.c | 2 +- arch/ia64/kernel/efi.c | 2 +- arch/ia64/kernel/ia64_ksyms.c | 2 +- arch/ia64/kernel/iosapic.c | 2 +- arch/ia64/kernel/mca.c | 2 +- arch/ia64/kernel/mca_drv.c | 2 +- arch/ia64/kernel/setup.c | 1 - arch/ia64/kernel/smpboot.c | 2 +- arch/ia64/kernel/topology.c | 2 +- arch/ia64/kernel/unwind.c | 2 +- arch/ia64/mm/contig.c | 1 - arch/ia64/mm/discontig.c | 1 - arch/ia64/mm/init.c | 1 - arch/ia64/mm/numa.c | 2 +- arch/ia64/mm/tlb.c | 2 +- arch/ia64/pci/pci.c | 2 +- arch/ia64/sn/kernel/bte.c | 2 +- arch/ia64/sn/kernel/io_common.c | 2 +- arch/ia64/sn/kernel/setup.c | 2 +- arch/m68k/atari/stram.c | 2 +- arch/m68k/coldfire/m54xx.c | 2 +- arch/m68k/kernel/setup_mm.c | 1 - arch/m68k/kernel/setup_no.c | 1 - arch/m68k/kernel/uboot.c | 2 +- arch/m68k/mm/init.c | 2 +- arch/m68k/mm/mcfmmu.c | 1 - arch/m68k/mm/motorola.c | 1 - arch/m68k/mm/sun3mmu.c | 2 +- arch/m68k/sun3/config.c | 2 +- arch/m68k/sun3/dvma.c | 2 +- arch/m68k/sun3/mmu_emu.c | 2 +- arch/m68k/sun3/sun3dvma.c | 2 +- arch/m68k/sun3x/dvma.c | 2 +- arch/microblaze/mm/consistent.c | 2 +- arch/microblaze/mm/init.c | 3 +- arch/microblaze/pci/pci-common.c | 2 +- arch/mips/ar7/memory.c | 2 +- arch/mips/ath79/setup.c | 2 +- arch/mips/bcm63xx/prom.c | 2 +- arch/mips/bcm63xx/setup.c | 2 +- arch/mips/bmips/setup.c | 2 +- arch/mips/cavium-octeon/dma-octeon.c | 2 +- arch/mips/dec/prom/memory.c | 2 +- arch/mips/emma/common/prom.c | 2 +- arch/mips/fw/arc/memory.c | 2 +- arch/mips/jazz/jazzdma.c | 2 +- arch/mips/kernel/crash.c | 2 +- arch/mips/kernel/crash_dump.c | 2 +- arch/mips/kernel/prom.c | 2 +- arch/mips/kernel/setup.c | 1 - arch/mips/kernel/traps.c | 1 - arch/mips/kernel/vpe.c | 2 +- arch/mips/kvm/commpage.c | 2 +- arch/mips/kvm/dyntrans.c | 2 +- arch/mips/kvm/emulate.c | 2 +- arch/mips/kvm/interrupt.c | 2 +- arch/mips/kvm/mips.c | 2 +- arch/mips/lantiq/prom.c | 2 +- arch/mips/lasat/prom.c | 2 +- arch/mips/loongson64/common/init.c | 2 +- arch/mips/loongson64/loongson-3/numa.c | 1 - arch/mips/mm/init.c | 2 +- arch/mips/mm/pgtable-32.c | 2 +- arch/mips/mti-malta/malta-memory.c | 2 +- arch/mips/netlogic/xlp/dt.c | 2 +- arch/mips/pci/pci-legacy.c | 2 +- arch/mips/pci/pci.c | 2 +- arch/mips/ralink/of.c | 2 +- arch/mips/rb532/prom.c | 2 +- arch/mips/sgi-ip27/ip27-memory.c | 1 - arch/mips/sibyte/common/cfe.c | 2 +- arch/mips/sibyte/swarm/setup.c | 2 +- arch/mips/txx9/rbtx4938/prom.c | 2 +- arch/nds32/kernel/setup.c | 3 +- arch/nds32/mm/highmem.c | 2 +- arch/nds32/mm/init.c | 3 +- arch/nios2/kernel/prom.c | 2 +- arch/nios2/kernel/setup.c | 1 - arch/nios2/mm/init.c | 2 +- arch/openrisc/kernel/setup.c | 3 +- arch/openrisc/mm/init.c | 3 +- arch/parisc/mm/init.c | 1 - arch/powerpc/kernel/pci_32.c | 2 +- arch/powerpc/kernel/setup-common.c | 1 - arch/powerpc/kernel/setup_64.c | 3 +- arch/powerpc/lib/alloc.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 1 - arch/powerpc/mm/mem.c | 3 +- arch/powerpc/mm/mmu_context_nohash.c | 2 +- arch/powerpc/mm/numa.c | 3 +- arch/powerpc/platforms/powermac/nvram.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 3 +- arch/powerpc/platforms/ps3/setup.c | 2 +- arch/powerpc/sysdev/msi_bitmap.c | 2 +- arch/riscv/mm/init.c | 3 +- arch/s390/kernel/crash_dump.c | 3 +- arch/s390/kernel/setup.c | 1 - arch/s390/kernel/smp.c | 3 +- arch/s390/kernel/topology.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/s390/mm/extmem.c | 2 +- arch/s390/mm/init.c | 3 +- arch/s390/mm/vmem.c | 3 +- arch/s390/numa/mode_emu.c | 1 - arch/s390/numa/numa.c | 1 - arch/s390/numa/toptree.c | 2 +- arch/sh/mm/init.c | 3 +- arch/sh/mm/ioremap_fixed.c | 2 +- arch/sparc/kernel/mdesc.c | 2 - arch/sparc/kernel/prom_32.c | 2 +- arch/sparc/kernel/setup_64.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/sparc/mm/init_32.c | 1 - arch/sparc/mm/init_64.c | 3 +- arch/sparc/mm/srmmu.c | 2 +- arch/um/drivers/net_kern.c | 2 +- arch/um/drivers/vector_kern.c | 2 +- arch/um/kernel/initrd.c | 2 +- arch/um/kernel/mem.c | 1 - arch/um/kernel/physmem.c | 1 - arch/unicore32/kernel/hibernate.c | 2 +- arch/unicore32/kernel/setup.c | 3 +- arch/unicore32/mm/init.c | 3 +- arch/unicore32/mm/mmu.c | 1 - arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/acpi/sleep.c | 1 - arch/x86/kernel/apic/apic.c | 2 +- arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/e820.c | 3 +- arch/x86/kernel/mpparse.c | 1 - arch/x86/kernel/pci-dma.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 2 +- arch/x86/kernel/pvclock.c | 2 +- arch/x86/kernel/setup.c | 1 - arch/x86/kernel/setup_percpu.c | 1 - arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tce_64.c | 1 - arch/x86/mm/amdtopology.c | 1 - arch/x86/mm/fault.c | 2 +- arch/x86/mm/highmem_32.c | 2 +- arch/x86/mm/init.c | 1 - arch/x86/mm/init_32.c | 1 - arch/x86/mm/init_64.c | 1 - arch/x86/mm/ioremap.c | 2 +- arch/x86/mm/kasan_init_64.c | 3 +- arch/x86/mm/kaslr.c | 1 + arch/x86/mm/numa.c | 1 - arch/x86/mm/numa_32.c | 1 - arch/x86/mm/numa_64.c | 2 +- arch/x86/mm/numa_emulation.c | 1 - arch/x86/mm/pageattr-test.c | 2 +- arch/x86/mm/pageattr.c | 2 +- arch/x86/mm/pat.c | 2 +- arch/x86/mm/physaddr.c | 2 +- arch/x86/pci/i386.c | 2 +- arch/x86/platform/efi/efi.c | 3 +- arch/x86/platform/efi/efi_64.c | 2 +- arch/x86/platform/efi/quirks.c | 1 - arch/x86/platform/olpc/olpc_dt.c | 2 +- arch/x86/power/hibernate_32.c | 2 +- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/enlighten_pv.c | 3 +- arch/x86/xen/p2m.c | 1 - arch/xtensa/kernel/pci.c | 2 +- arch/xtensa/mm/cache.c | 2 +- arch/xtensa/mm/init.c | 2 +- arch/xtensa/mm/kasan_init.c | 3 +- arch/xtensa/mm/mmu.c | 2 +- arch/xtensa/platforms/iss/network.c | 2 +- arch/xtensa/platforms/iss/setup.c | 2 +- block/blk-settings.c | 2 +- block/bounce.c | 2 +- drivers/acpi/numa.c | 1 - drivers/acpi/tables.c | 3 +- drivers/base/platform.c | 2 +- drivers/clk/ti/clk.c | 2 +- drivers/firmware/dmi_scan.c | 2 +- drivers/firmware/efi/apple-properties.c | 2 +- drivers/firmware/iscsi_ibft_find.c | 2 +- drivers/firmware/memmap.c | 2 +- drivers/iommu/mtk_iommu.c | 2 +- drivers/iommu/mtk_iommu_v1.c | 2 +- drivers/macintosh/smu.c | 3 +- drivers/mtd/ar7part.c | 2 +- drivers/net/arcnet/arc-rimi.c | 2 +- drivers/net/arcnet/com20020-isa.c | 2 +- drivers/net/arcnet/com90io.c | 2 +- drivers/of/fdt.c | 1 - drivers/of/unittest.c | 2 +- drivers/s390/char/fs3270.c | 2 +- drivers/s390/char/tty3270.c | 2 +- drivers/s390/cio/cmf.c | 2 +- drivers/s390/virtio/virtio_ccw.c | 2 +- drivers/sfi/sfi_core.c | 2 +- drivers/tty/serial/cpm_uart/cpm_uart_core.c | 2 +- drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c | 2 +- drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c | 2 +- drivers/usb/early/xhci-dbc.c | 1 - drivers/xen/balloon.c | 2 +- drivers/xen/events/events_base.c | 2 +- drivers/xen/grant-table.c | 2 +- drivers/xen/swiotlb-xen.c | 1 - drivers/xen/xen-selfballoon.c | 2 +- fs/dcache.c | 2 +- fs/inode.c | 2 +- fs/namespace.c | 2 +- fs/proc/kcore.c | 2 +- fs/proc/page.c | 2 +- fs/proc/vmcore.c | 2 +- include/linux/bootmem.h | 173 ---------------------------- include/linux/memblock.h | 151 +++++++++++++++++++++++- init/main.c | 2 +- kernel/dma/direct.c | 2 +- kernel/dma/swiotlb.c | 2 +- kernel/futex.c | 2 +- kernel/locking/qspinlock_paravirt.h | 2 +- kernel/pid.c | 2 +- kernel/power/snapshot.c | 2 +- kernel/printk/printk.c | 1 - kernel/profile.c | 2 +- lib/cpumask.c | 2 +- mm/hugetlb.c | 1 - mm/kasan/kasan_init.c | 3 +- mm/kmemleak.c | 2 +- mm/memblock.c | 1 - mm/memory_hotplug.c | 1 - mm/page_alloc.c | 1 - mm/page_ext.c | 2 +- mm/page_idle.c | 2 +- mm/page_owner.c | 2 +- mm/percpu.c | 2 +- mm/sparse-vmemmap.c | 1 - mm/sparse.c | 1 - net/ipv4/inet_hashtables.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/udp.c | 2 +- net/sctp/protocol.c | 2 +- net/xfrm/xfrm_hash.c | 2 +- 275 files changed, 353 insertions(+), 476 deletions(-) delete mode 100644 include/linux/bootmem.h (limited to 'net') diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c index 026ee955fd10..867e8730b0c5 100644 --- a/arch/alpha/kernel/core_cia.c +++ b/arch/alpha/kernel/core_cia.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c index 35572be9deb5..a9fd133a7fb2 100644 --- a/arch/alpha/kernel/core_irongate.c +++ b/arch/alpha/kernel/core_irongate.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 1f00c9433b10..8a568c4d8e81 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 132b06bdf903..97551597581b 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c index e7c956ea46b6..f334b8928d72 100644 --- a/arch/alpha/kernel/core_tsunami.c +++ b/arch/alpha/kernel/core_tsunami.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index 59cbfc2bf2c5..a9378ee0c2f1 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 4cc3eb92f55b..13937e72d875 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 5d178c7ba5b2..82cf950bda2a 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 64c06a0adf3d..a37fd990bd55 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index ff4f54b86c7f..cd9a112d67ff 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 853d15344934..a42fc5c4db89 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -19,7 +19,7 @@ #include #include #include -#include /* max_low_pfn */ +#include /* max_low_pfn */ #include #include diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 26cd925d19b1..74846553e3f1 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 2a01dd1005f4..d34f69eb1a95 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c index f582dc8944c9..48e700151810 100644 --- a/arch/arc/mm/highmem.c +++ b/arch/arc/mm/highmem.c @@ -7,7 +7,7 @@ * */ -#include +#include #include #include #include diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 0f29c6548779..f8fe5668b30f 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -8,7 +8,6 @@ #include #include -#include #include #ifdef CONFIG_BLK_DEV_INITRD #include diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 13bcd3b867cb..e3057c1b55b9 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 39e6090d23ac..840a4adc69fc 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 1f9b34a7eccd..cd5732ab0cdf 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -141,7 +141,7 @@ #include #include #include -#include +#include #include diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 66566472c153..661fe48ab78d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -9,7 +9,6 @@ * * DMA uncached mapping support. */ -#include #include #include #include diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d421a10c93a8..32e4845af2b6 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 785d2a562a23..cb44aa290e73 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 0641ba54ab62..e70a49fc8dcd 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index ed46dc188b22..44e3c351e1ea 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -16,7 +16,6 @@ #define pr_fmt(fmt) "ACPI: " fmt #include -#include #include #include #include diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index 4f4f1815e047..eac1d0cc595c 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3428427f6c93..7ce7306f1d75 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index d190612b8f33..3a703e5d4e32 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d8d73073835f..9d9582cac6c4 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 6a65a2912d36..63527e585aac 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -11,7 +11,6 @@ */ #define pr_fmt(fmt) "kasan: " fmt -#include #include #include #include diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 0bff116c07a8..27a31efd9e8e 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -20,7 +20,6 @@ #define pr_fmt(fmt) "NUMA: " fmt #include -#include #include #include #include diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c index 05d96a9541b5..2e1c0ea22eb0 100644 --- a/arch/c6x/kernel/setup.c +++ b/arch/c6x/kernel/setup.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index 3383df8b3508..af5ada0520be 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #ifdef CONFIG_BLK_DEV_RAM #include #endif diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 34e2df5c0d6d..b32bfa1fe99e 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index f2bf4487aabd..6519252ac4db 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 706699374444..38eaa7b703e7 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c index dc8c7e75b5d1..b3c3e04d4e57 100644 --- a/arch/hexagon/kernel/setup.c +++ b/arch/hexagon/kernel/setup.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 88643faf3981..1719ede9e9bd 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 39f4433a6f0e..bec762a9b418 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index f77d80edddfe..8f106638913c 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -23,7 +23,7 @@ * Skip non-WB memory and ignore empty memory ranges. */ #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 6b51c88e3578..b49fe6f618ed 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -6,7 +6,7 @@ #ifdef CONFIG_VIRTUAL_MEM_MAP #include #include -#include +#include EXPORT_SYMBOL(min_low_pfn); /* defined by bootmem.c, but not exported by generic code */ EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */ #endif diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 550243a94b5d..fe6e4946672e 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -90,7 +90,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 71209766c47f..9a6603f8e409 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -77,7 +77,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index dfe40cbdf3b3..45f956ad715a 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 0e6c2d9fb498..583a3746d70b 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -27,7 +27,6 @@ #include #include -#include #include #include #include diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 74fe317477e6..51ec944b036c 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 9b820f7a6a98..e311ee13e61d 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index e04efa088902..7601fe0622d2 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -28,7 +28,7 @@ * acquired, then the read-write lock must be acquired first. */ #include -#include +#include #include #include #include diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 9e5c23a6b8b4..6e447234205c 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -14,7 +14,6 @@ * Routines used by ia64 machines with contiguous (or virtually contiguous) * memory. */ -#include #include #include #include diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 70609f823960..8a965784340c 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 43ea4a47163d..d5e12ff1d73c 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -8,7 +8,6 @@ #include #include -#include #include #include #include diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index aa19b7ac8222..3861d6e32d5f 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 5554863b4c9b..ab545daff7c3 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 5d71800df431..196a0dd7ff97 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 9146192b86f5..9900e6d4add6 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index 8b05d5581615..98f55220c67d 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -6,7 +6,7 @@ * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. */ -#include +#include #include #include #include diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index ab2564f95199..71ad6b0ccab4 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c index 1089d67df315..6ffc204eb07d 100644 --- a/arch/m68k/atari/stram.c +++ b/arch/m68k/atari/stram.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/coldfire/m54xx.c b/arch/m68k/coldfire/m54xx.c index adad03ca6e11..360c723c0ae6 100644 --- a/arch/m68k/coldfire/m54xx.c +++ b/arch/m68k/coldfire/m54xx.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 5d3596c180f9..a1a3eaeaf58c 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index cfd5475bfc31..3c5def10d486 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/kernel/uboot.c b/arch/m68k/kernel/uboot.c index 107082877064..1b4c562753da 100644 --- a/arch/m68k/kernel/uboot.c +++ b/arch/m68k/kernel/uboot.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index ae49ae4d3049..933c33e76a48 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 38a1d92dd555..0de4999a3810 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 2113eec8dbf9..7497cf30bf1c 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c index 19c05ab9824d..f736db48a2e1 100644 --- a/arch/m68k/mm/sun3mmu.c +++ b/arch/m68k/mm/sun3mmu.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c index 79a2bb857906..542c4404861c 100644 --- a/arch/m68k/sun3/config.c +++ b/arch/m68k/sun3/config.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/m68k/sun3/dvma.c b/arch/m68k/sun3/dvma.c index 5f92c72b05c3..a2c1c9304895 100644 --- a/arch/m68k/sun3/dvma.c +++ b/arch/m68k/sun3/dvma.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c index d30da12a1702..582a1284059a 100644 --- a/arch/m68k/sun3/mmu_emu.c +++ b/arch/m68k/sun3/mmu_emu.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c index 72d94585b52e..8be8b750c629 100644 --- a/arch/m68k/sun3/sun3dvma.c +++ b/arch/m68k/sun3/sun3dvma.c @@ -7,7 +7,7 @@ * Contains common routines for sun3/sun3x DVMA management. */ -#include +#include #include #include #include diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c index b2acbc862f60..89e630e66555 100644 --- a/arch/m68k/sun3x/dvma.c +++ b/arch/m68k/sun3x/dvma.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index d801cc5f5b95..45e0a1aa9357 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 9989740d397a..8c14988f52f2 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -7,10 +7,9 @@ * for more details. */ -#include +#include #include #include -#include #include /* mem_init */ #include #include diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 2ffd171af8b6..6b89a66ec1a5 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/ar7/memory.c b/arch/mips/ar7/memory.c index 0332f0514d05..80390a9ec264 100644 --- a/arch/mips/ar7/memory.c +++ b/arch/mips/ar7/memory.c @@ -16,7 +16,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include +#include #include #include #include diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 4c7a93f4039a..9728abcb18fa 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c index 7019e2967009..77a836e661c9 100644 --- a/arch/mips/bcm63xx/prom.c +++ b/arch/mips/bcm63xx/prom.c @@ -7,7 +7,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c index 2be9caaa2085..e28ee9a7cc7e 100644 --- a/arch/mips/bcm63xx/setup.c +++ b/arch/mips/bcm63xx/setup.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index 6329c5f780d6..1738a06396f9 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index c44c1a654471..e8eb60ed99f2 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -11,7 +11,7 @@ * Copyright (C) 2010 Cavium Networks, Inc. */ #include -#include +#include #include #include #include diff --git a/arch/mips/dec/prom/memory.c b/arch/mips/dec/prom/memory.c index a2acc6454cf3..5073d2ed78bb 100644 --- a/arch/mips/dec/prom/memory.c +++ b/arch/mips/dec/prom/memory.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/emma/common/prom.c b/arch/mips/emma/common/prom.c index cae42259d6da..675337b8a4a0 100644 --- a/arch/mips/emma/common/prom.c +++ b/arch/mips/emma/common/prom.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c index dd9496f26e6a..429b7f8d2aeb 100644 --- a/arch/mips/fw/arc/memory.c +++ b/arch/mips/fw/arc/memory.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 0a0aaf39fd16..4c41ed0a637e 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c index 2c7288041a99..81845ba04835 100644 --- a/arch/mips/kernel/crash.c +++ b/arch/mips/kernel/crash.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c index a8657d29c62e..01b2bd95ba1f 100644 --- a/arch/mips/kernel/crash_dump.c +++ b/arch/mips/kernel/crash_dump.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 89950b7bf536..93b8e0b4332f 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 31522d3bc8bf..41c1683761bb 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 623dc18f7f2f..0f852e1b5891 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 0bef238d2c0c..6176b9acba95 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kvm/commpage.c b/arch/mips/kvm/commpage.c index f43629979a0e..5812e6145801 100644 --- a/arch/mips/kvm/commpage.c +++ b/arch/mips/kvm/commpage.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c index f8e772564d74..d77b61b3d6ee 100644 --- a/arch/mips/kvm/dyntrans.c +++ b/arch/mips/kvm/dyntrans.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include "commpage.h" diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 4144bfaef137..ec9ed23bca7f 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c index aa0a1a00faf6..7257e8b6f5a9 100644 --- a/arch/mips/kvm/interrupt.c +++ b/arch/mips/kvm/interrupt.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index f7ea8e21656b..1fcc4d149054 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index d984bd5c2ec5..14d4c5e2b42f 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/arch/mips/lasat/prom.c b/arch/mips/lasat/prom.c index 37b8fc5b9ac9..5ce1407de2d5 100644 --- a/arch/mips/lasat/prom.c +++ b/arch/mips/lasat/prom.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson64/common/init.c index 6ef17120722f..c073fbcb9805 100644 --- a/arch/mips/loongson64/common/init.c +++ b/arch/mips/loongson64/common/init.c @@ -8,7 +8,7 @@ * option) any later version. */ -#include +#include #include #include #include diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 703ad4536fe0..622761878cd1 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 0893b6136498..b521d8e2d359 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index b19a3c506b1e..e2a33adc0f29 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/mips/mti-malta/malta-memory.c b/arch/mips/mti-malta/malta-memory.c index a47556723b85..868921adef1d 100644 --- a/arch/mips/mti-malta/malta-memory.c +++ b/arch/mips/mti-malta/malta-memory.c @@ -12,7 +12,7 @@ * Steven J. Hill */ #include -#include +#include #include #include diff --git a/arch/mips/netlogic/xlp/dt.c b/arch/mips/netlogic/xlp/dt.c index b5ba83f4c646..c856f2a3ea42 100644 --- a/arch/mips/netlogic/xlp/dt.c +++ b/arch/mips/netlogic/xlp/dt.c @@ -33,7 +33,7 @@ */ #include -#include +#include #include #include diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 3c3b1e6abb53..687513880fbf 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index c2e94cf5ecda..e68b44b27c0d 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 1ada8492733b..d544e7b07f7a 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c index 6484e4a4597b..361a690facbf 100644 --- a/arch/mips/rb532/prom.c +++ b/arch/mips/rb532/prom.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index cb1f1a6a166d..d8b8444d6795 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/sibyte/common/cfe.c b/arch/mips/sibyte/common/cfe.c index 092fb2a6ec4a..12a780f251e1 100644 --- a/arch/mips/sibyte/common/cfe.c +++ b/arch/mips/sibyte/common/cfe.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index 152ca71cc2d7..3b034b7178d6 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/mips/txx9/rbtx4938/prom.c b/arch/mips/txx9/rbtx4938/prom.c index bcb469247e8c..2b36a2ee744c 100644 --- a/arch/mips/txx9/rbtx4938/prom.c +++ b/arch/mips/txx9/rbtx4938/prom.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index 63a1a5ef5219..eacc79024879 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -2,9 +2,8 @@ // Copyright (C) 2005-2017 Andes Technology Corporation #include -#include -#include #include +#include #include #include #include diff --git a/arch/nds32/mm/highmem.c b/arch/nds32/mm/highmem.c index e17cb8a69315..022779af6148 100644 --- a/arch/nds32/mm/highmem.c +++ b/arch/nds32/mm/highmem.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c index 66d3e9cf498d..131104bd2538 100644 --- a/arch/nds32/mm/init.c +++ b/arch/nds32/mm/init.c @@ -7,12 +7,11 @@ #include #include #include -#include +#include #include #include #include #include -#include #include #include diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c index a6d4f7530247..232a36b511aa 100644 --- a/arch/nios2/kernel/prom.c +++ b/arch/nios2/kernel/prom.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index 2d0011ddd4d5..6bbd4ae2beb0 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index 12923501d94f..16cea5776b87 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index e17fcd83120f..c605bdad1746 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -30,13 +30,12 @@ #include #include #include -#include +#include #include #include #include #include #include -#include #include #include diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index 91a6a9ab7598..d157310eb377 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -26,12 +26,11 @@ #include #include #include -#include +#include #include #include #include /* for initrd_* */ #include -#include #include #include diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 7e7a3126c5e9..2d7cffcaa476 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index d39ec3a4550a..274bd1442dd9 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2b56d1f30387..93ee3703b42f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9216c3a7fcfc..2a51e4cc8246 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -29,10 +29,9 @@ #include #include #include -#include +#include #include #include -#include #include #include diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index bf87d6e13369..5b61704447c1 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a7226ed9cae6..8cf035e68378 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index b3fe79064a69..0a64fffabee1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -27,12 +27,11 @@ #include #include #include -#include +#include #include #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 954f1986af4d..67b9d7b669a1 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f04f15f9d232..3a048e98a132 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -11,7 +11,7 @@ #define pr_fmt(fmt) "numa: " fmt #include -#include +#include #include #include #include @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index f45b369177a4..f3391be7c762 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 23a67b545b70..aba81cbf0b36 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -17,11 +17,10 @@ #include #include #include -#include +#include #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 12519857a33c..658bfab3350b 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 349a9ff6ca5b..2444feda831f 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index d58c111099b3..1d9bfaff60bc 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -13,9 +13,8 @@ #include #include -#include -#include #include +#include #include #include diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index d17566a8c76f..97eae3871868 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -13,10 +13,9 @@ #include #include #include -#include +#include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 781c1053a773..72dd23ef771b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 44f9a7d6450b..f82b3d3c36e2 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -20,7 +20,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include -#include +#include #include #include #include @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 799a91882a76..8992b04c0ade 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -8,7 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index ec31b48a42a5..ebe748a9f472 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 84111a43ea29..eba2def3414d 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 873f6ee1c46d..76d0708438e9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 04638b0b9ef1..0472e27febdf 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -4,14 +4,13 @@ * Author(s): Heiko Carstens */ -#include +#include #include #include #include #include #include #include -#include #include #include #include diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c index 5a381fc8e958..bfba273c32c0 100644 --- a/arch/s390/numa/mode_emu.c +++ b/arch/s390/numa/mode_emu.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 297f5d8b0890..ae0d9e889534 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c index 7f61cc3fd4d1..71a608cd4f61 100644 --- a/arch/s390/numa/toptree.c +++ b/arch/s390/numa/toptree.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 21447f866415..c8c13c777162 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -11,12 +11,11 @@ #include #include #include -#include +#include #include #include #include #include -#include #include #include #include diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c index 927a1294c465..07e744d75fa0 100644 --- a/arch/sh/mm/ioremap_fixed.c +++ b/arch/sh/mm/ioremap_fixed.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index a41526bd91e2..9a26b442f820 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -5,13 +5,11 @@ */ #include #include -#include #include #include #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index 4389944735c6..d41e2a749c5d 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index de7c87b153fe..cd2825cb8420 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 6cc80d0f4b9f..4792e08ad36b 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 880714565c40..d900952bfc5f 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index a8c3453195e6..3c8aac21f426 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index b48fea5ad9ef..a6142c5abf61 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index ef19a391214f..673816880cce 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -6,7 +6,7 @@ * Licensed under the GPL. */ -#include +#include #include #include #include diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 20442d20bd09..2b4dded11a7a 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 844056cf313e..3678f5b05e42 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -4,7 +4,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 2c672a8f4571..1067469ba2ea 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 296a91a04598..5bf56af4d5b9 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -4,7 +4,6 @@ */ #include -#include #include #include #include diff --git a/arch/unicore32/kernel/hibernate.c b/arch/unicore32/kernel/hibernate.c index 9969ec374abb..29b71c68eb7c 100644 --- a/arch/unicore32/kernel/hibernate.c +++ b/arch/unicore32/kernel/hibernate.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c index 9f163f976315..b2c38b32ea57 100644 --- a/arch/unicore32/kernel/setup.c +++ b/arch/unicore32/kernel/setup.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -27,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 44fd0e8fbe87..cf4eb9481fd6 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -11,13 +11,12 @@ #include #include #include -#include +#include #include #include #include #include #include -#include #include #include #include diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index 18b355a20f0b..040a8c279761 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f5ea6415b778..7f5d212551d4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index f1915b744052..ca13851f0570 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ab731ab09f06..32b2b7a41ef5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8c7450900e0e..5fbc57e4b0b9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -47,7 +47,7 @@ #include #include /* time_after() */ #include -#include +#include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 660d0b22e962..cbbd57ae06ee 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,7 +1,7 @@ /* cpu_feature_enabled() cannot be used this early */ #define USE_EARLY_PGTABLE_L5 -#include +#include #include #include #include diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a0ec4c37265a..68ff62bffbab 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -9,11 +9,10 @@ * allocation code routines via a platform independent interface (memblock, etc.). */ #include -#include +#include #include #include #include -#include #include #include diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f1c5eb99d445..3482460d984d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 7ba73fe0d917..f4562fcec681 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 71c0b01d93b1..bd08b9e1c9e2 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 637982efecd8..9b158b4716d2 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 7005f89bf3b2..b74e7bfed6ab 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 483412fb8a24..e8796fcd7e5a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5369d7fac797..a9134d1910b9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c index 75730ce01f8d..285aaa62d153 100644 --- a/arch/x86/kernel/tce_64.c +++ b/arch/x86/kernel/tce_64.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index 048c761d97b0..058b2f36b3a6 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b24eb4eb9984..71d4b9d4d43f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -8,7 +8,7 @@ #include /* task_stack_*(), ... */ #include /* oops_begin/end, ... */ #include /* search_exception_tables */ -#include /* max_low_pfn */ +#include /* max_low_pfn */ #include /* NOKPROBE_SYMBOL, ... */ #include /* kmmio_handler, ... */ #include /* perf_sw_event */ diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 62915a5e0fa2..0d4bdcb84da5 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -1,7 +1,7 @@ #include #include #include /* for totalram_pages */ -#include +#include void *kmap(struct page *page) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index faca978ebf9d..ef99f3892e1f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -3,7 +3,6 @@ #include #include #include -#include /* for max_low_pfn */ #include #include diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 3bbe5f58a67d..49ecf5ecf6d3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bfb0bedc21d3..5fab264948c2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 24e0920a9b25..5378d10f1d31 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -6,7 +6,7 @@ * (C) Copyright 1995 1996 Linus Torvalds */ -#include +#include #include #include #include diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 8f87499124b8..04a9cf6b034f 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -5,10 +5,9 @@ /* cpu_feature_enabled() cannot be used this early */ #define USE_EARLY_PGTABLE_L5 -#include +#include #include #include -#include #include #include #include diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 61db77b0eda9..3f452ffed7e9 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 16e37d712ffd..1308f5408bf7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index e8a4a09e20f1..f2bd3d61e16b 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -22,7 +22,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include #include #include diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 066f3511d5f1..59d80160fa5a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -3,7 +3,7 @@ * Generic VM initialization for x86-64 NUMA setups. * Copyright 2002,2003 Andi Kleen, SuSE Labs. */ -#include +#include #include "numa_internal.h" diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index b54d52a2d00a..a80fdd7fb40f 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include "numa_internal.h" diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index a25588ad75ef..08f8f76a4852 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -5,7 +5,7 @@ * Clears the a test pte bit on random pages in the direct mapping, * then reverts and compares page tables forwards and afterwards. */ -#include +#include #include #include #include diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 62bb30b4bd2a..f799076e3d57 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -3,7 +3,7 @@ * Thanks to Ben LaHaise for precious feedback. */ #include -#include +#include #include #include #include diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 3d0c83ef6aab..08013524fba1 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index 7f9acb68324c..bdc98150d4db 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include #include #include diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index ed4ac215305d..8cd66152cdb0 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 9061babfbc83..7ae939e353cd 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -36,9 +36,8 @@ #include #include #include -#include -#include #include +#include #include #include #include diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e8da7f492970..cf0347f61b21 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 4b70d0f5a803..95e77a667ba5 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index 140cd76ee897..115c8e4173bb 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -17,7 +17,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 15695e30f982..be15bdcb20df 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 67b2f31a1265..e996e8e744cb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -#include +#include #endif #include #include diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ec7a4209f310..2f6787fc7106 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b3e11afed25b..b06731705529 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index 21f13e9aabe1..5ca440a74316 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 9220dcde7520..b27359e2a464 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index f7fbe6334939..9750a48f491b 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c index 1a30a258ccd0..6b95ca43aec0 100644 --- a/arch/xtensa/mm/kasan_init.c +++ b/arch/xtensa/mm/kasan_init.c @@ -8,11 +8,10 @@ * Copyright (C) 2017 Cadence Design Systems Inc. */ -#include +#include #include #include #include -#include #include #include #include diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index f33a1ff2662a..a4dcfd39bc5c 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -4,7 +4,7 @@ * * Extracted from init.c */ -#include +#include #include #include #include diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 206b9d4591e8..190846dddc67 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index 58709e89a8ed..c14cc673976c 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -16,7 +16,7 @@ * option) any later version. * */ -#include +#include #include #include #include diff --git a/block/blk-settings.c b/block/blk-settings.c index ffd459969689..696c04c1ab6c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -6,7 +6,7 @@ #include #include #include -#include /* for max_pfn/max_low_pfn */ +#include /* for max_pfn/max_low_pfn */ #include #include #include diff --git a/block/bounce.c b/block/bounce.c index ec0d99995f5f..cf49fe02f65c 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 85167603b9c9..274699463b4f 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index a3d012b08fc5..61203eebf3a1 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -31,9 +31,8 @@ #include #include #include -#include -#include #include +#include #include #include "internal.h" diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 23cf4427f425..41b91af95afb 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index 5c54d3734daf..5b2867a33b98 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include "clock.h" diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index f2483548cde9..099d83e4e910 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c index 2b675f788b61..ac1654f74dc7 100644 --- a/drivers/firmware/efi/apple-properties.c +++ b/drivers/firmware/efi/apple-properties.c @@ -20,7 +20,7 @@ #define pr_fmt(fmt) "apple-properties: " fmt -#include +#include #include #include #include diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c index 2224f1dc074b..72d9ea18270b 100644 --- a/drivers/firmware/iscsi_ibft_find.c +++ b/drivers/firmware/iscsi_ibft_find.c @@ -18,7 +18,7 @@ * GNU General Public License for more details. */ -#include +#include #include #include #include diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 03cead6d5f97..2a23453f005a 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index f9f69f7111a9..44bd5b9166bb 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -11,7 +11,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ -#include +#include #include #include #include diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 676c029494e4..0e780848f59b 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -13,7 +13,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ -#include +#include #include #include #include diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 0069f9084f9f..880a81c82b7a 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c index fc15ec58230a..0d33cf0842ad 100644 --- a/drivers/mtd/ar7part.c +++ b/drivers/mtd/ar7part.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c index a07e24970be4..11c5bad95226 100644 --- a/drivers/net/arcnet/arc-rimi.c +++ b/drivers/net/arcnet/arc-rimi.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c index 38fa60ddaf2e..28510e33924f 100644 --- a/drivers/net/arcnet/com20020-isa.c +++ b/drivers/net/arcnet/com20020-isa.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include "arcdevice.h" diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c index 4e56aaf2b984..2c546013a980 100644 --- a/drivers/net/arcnet/com90io.c +++ b/drivers/net/arcnet/com90io.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index ffe62a7ae19b..bb532aae0d92 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 01e23b85e798..49ae2aa744d6 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -5,7 +5,7 @@ #define pr_fmt(fmt) "### dt-test ### " fmt -#include +#include #include #include #include diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 16a4e8528bbc..8f3a2eeb28dc 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -8,7 +8,7 @@ * Copyright IBM Corp. 2003, 2009 */ -#include +#include #include #include #include diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 5b8af2782282..2b0c36c2c568 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 8af4948dae80..72dd2471ec1e 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -13,7 +13,7 @@ #define KMSG_COMPONENT "cio" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include +#include #include #include #include diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 8f5c1d7f751a..97b6f197f007 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/sfi/sfi_core.c b/drivers/sfi/sfi_core.c index 153b3f3cc795..a5136901dd8a 100644 --- a/drivers/sfi/sfi_core.c +++ b/drivers/sfi/sfi_core.c @@ -59,7 +59,7 @@ #define KMSG_COMPONENT "SFI" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include +#include #include #include #include diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c index 79ad30d34949..b929c7ae3a27 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c index 4eba17f3d293..56fc527015cb 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c index e3bff068dc3c..6a1cd03bfe39 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index ddc5fa88f268..d2652dccc699 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index e12bb256036f..a3f5cbfcd4a1 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index e6c1934734b7..93194f3e7540 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 84575baceebc..f15f89df1f36 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -33,7 +33,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt -#include +#include #include #include #include diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index c5f26a87d238..2a7f545bd0b5 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -35,7 +35,6 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt -#include #include #include #include diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 55988b8418ee..5165aa82bf7d 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -68,7 +68,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include +#include #include #include #include diff --git a/fs/dcache.c b/fs/dcache.c index c2e443fb76ae..2593153471cf 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/inode.c b/fs/inode.c index 9b808986d440..9e198f00b64c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/namespace.c b/fs/namespace.c index d86830c86ce8..98d27da43304 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d297fe4472a9..bbcc185062bb 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/proc/page.c b/fs/proc/page.c index 792c78a49174..6c517b11acf8 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include #include #include diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 0377a104495b..3fe90443c1bb 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h deleted file mode 100644 index b58873a567b2..000000000000 --- a/include/linux/bootmem.h +++ /dev/null @@ -1,173 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 - */ -#ifndef _LINUX_BOOTMEM_H -#define _LINUX_BOOTMEM_H - -#include -#include -#include -#include - -/* - * simple boot-time physical memory area allocator. - */ - -extern unsigned long max_low_pfn; -extern unsigned long min_low_pfn; - -/* - * highest page - */ -extern unsigned long max_pfn; -/* - * highest possible page - */ -extern unsigned long long max_possible_pfn; - -extern unsigned long memblock_free_all(void); -extern void reset_node_managed_pages(pg_data_t *pgdat); -extern void reset_all_zones_managed_pages(void); - -/* We are using top down, so it is safe to use 0 here */ -#define BOOTMEM_LOW_LIMIT 0 - -#ifndef ARCH_LOW_ADDRESS_LIMIT -#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL -#endif - -/* FIXME: use MEMBLOCK_ALLOC_* variants here */ -#define BOOTMEM_ALLOC_ACCESSIBLE 0 -#define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0) - -/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */ -void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, - phys_addr_t max_addr, int nid); -void *memblock_alloc_try_nid_nopanic(phys_addr_t size, - phys_addr_t align, phys_addr_t min_addr, - phys_addr_t max_addr, int nid); -void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, phys_addr_t max_addr, int nid); -void __memblock_free_early(phys_addr_t base, phys_addr_t size); -void __memblock_free_late(phys_addr_t base, phys_addr_t size); - -static inline void * __init memblock_alloc( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_raw( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_from( - phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) -{ - return memblock_alloc_try_nid(size, align, min_addr, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_nopanic( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid_nopanic(size, align, - BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_low( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid(size, align, - BOOTMEM_LOW_LIMIT, - ARCH_LOW_ADDRESS_LIMIT, - NUMA_NO_NODE); -} -static inline void * __init memblock_alloc_low_nopanic( - phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_try_nid_nopanic(size, align, - BOOTMEM_LOW_LIMIT, - ARCH_LOW_ADDRESS_LIMIT, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_from_nopanic( - phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) -{ - return memblock_alloc_try_nid_nopanic(size, align, min_addr, - BOOTMEM_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_node( - phys_addr_t size, phys_addr_t align, int nid) -{ - return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, nid); -} - -static inline void * __init memblock_alloc_node_nopanic( - phys_addr_t size, int nid) -{ - return memblock_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT, - BOOTMEM_ALLOC_ACCESSIBLE, - nid); -} - -static inline void __init memblock_free_early( - phys_addr_t base, phys_addr_t size) -{ - __memblock_free_early(base, size); -} - -static inline void __init memblock_free_early_nid( - phys_addr_t base, phys_addr_t size, int nid) -{ - __memblock_free_early(base, size); -} - -static inline void __init memblock_free_late( - phys_addr_t base, phys_addr_t size) -{ - __memblock_free_late(base, size); -} - -extern void *alloc_large_system_hash(const char *tablename, - unsigned long bucketsize, - unsigned long numentries, - int scale, - int flags, - unsigned int *_hash_shift, - unsigned int *_hash_mask, - unsigned long low_limit, - unsigned long high_limit); - -#define HASH_EARLY 0x00000001 /* Allocating during early boot? */ -#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min - * shift passed via *_hash_shift */ -#define HASH_ZERO 0x00000004 /* Zero allocated hash table */ - -/* Only NUMA needs hash distribution. 64bit NUMA architectures have - * sufficient vmalloc space. - */ -#ifdef CONFIG_NUMA -#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT) -extern int hashdist; /* Distribute hashes across NUMA nodes? */ -#else -#define hashdist (0) -#endif - - -#endif /* _LINUX_BOOTMEM_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 9d46a7204975..1b4d85879cbe 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -15,6 +15,19 @@ #include #include +#include + +extern unsigned long max_low_pfn; +extern unsigned long min_low_pfn; + +/* + * highest page + */ +extern unsigned long max_pfn; +/* + * highest possible page + */ +extern unsigned long long max_possible_pfn; #define INIT_MEMBLOCK_REGIONS 128 #define INIT_PHYSMEM_REGIONS 4 @@ -119,6 +132,10 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); enum memblock_flags choose_memblock_flags(void); +unsigned long memblock_free_all(void); +void reset_node_managed_pages(pg_data_t *pgdat); +void reset_all_zones_managed_pages(void); + /* Low level functions */ int memblock_add_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size, @@ -300,11 +317,116 @@ static inline int memblock_get_region_node(const struct memblock_region *r) } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +/* Flags for memblock allocation APIs */ +#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) +#define MEMBLOCK_ALLOC_ACCESSIBLE 0 + +/* We are using top down, so it is safe to use 0 here */ +#define MEMBLOCK_LOW_LIMIT 0 + +#ifndef ARCH_LOW_ADDRESS_LIMIT +#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL +#endif + phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_phys_alloc(phys_addr_t size, phys_addr_t align); +void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid); +void *memblock_alloc_try_nid_nopanic(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid); +void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid); + +static inline void * __init memblock_alloc(phys_addr_t size, phys_addr_t align) +{ + return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_raw(phys_addr_t size, + phys_addr_t align) +{ + return memblock_alloc_try_nid_raw(size, align, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, + NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_from(phys_addr_t size, + phys_addr_t align, + phys_addr_t min_addr) +{ + return memblock_alloc_try_nid(size, align, min_addr, + MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_nopanic(phys_addr_t size, + phys_addr_t align) +{ + return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, + NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_low(phys_addr_t size, + phys_addr_t align) +{ + return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, + ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE); +} +static inline void * __init memblock_alloc_low_nopanic(phys_addr_t size, + phys_addr_t align) +{ + return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT, + ARCH_LOW_ADDRESS_LIMIT, + NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_from_nopanic(phys_addr_t size, + phys_addr_t align, + phys_addr_t min_addr) +{ + return memblock_alloc_try_nid_nopanic(size, align, min_addr, + MEMBLOCK_ALLOC_ACCESSIBLE, + NUMA_NO_NODE); +} + +static inline void * __init memblock_alloc_node(phys_addr_t size, + phys_addr_t align, int nid) +{ + return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size, + int nid) +{ + return memblock_alloc_try_nid_nopanic(size, 0, MEMBLOCK_LOW_LIMIT, + MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +static inline void __init memblock_free_early(phys_addr_t base, + phys_addr_t size) +{ + __memblock_free_early(base, size); +} + +static inline void __init memblock_free_early_nid(phys_addr_t base, + phys_addr_t size, int nid) +{ + __memblock_free_early(base, size); +} + +static inline void __init memblock_free_late(phys_addr_t base, phys_addr_t size) +{ + __memblock_free_late(base, size); +} + /* * Set the allocation direction to bottom-up or top-down. */ @@ -323,10 +445,6 @@ static inline bool memblock_bottom_up(void) return memblock.bottom_up; } -/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ -#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) -#define MEMBLOCK_ALLOC_ACCESSIBLE 0 - phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, enum memblock_flags flags); @@ -432,6 +550,31 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo i < memblock_type->cnt; \ i++, rgn = &memblock_type->regions[i]) +extern void *alloc_large_system_hash(const char *tablename, + unsigned long bucketsize, + unsigned long numentries, + int scale, + int flags, + unsigned int *_hash_shift, + unsigned int *_hash_mask, + unsigned long low_limit, + unsigned long high_limit); + +#define HASH_EARLY 0x00000001 /* Allocating during early boot? */ +#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min + * shift passed via *_hash_shift */ +#define HASH_ZERO 0x00000004 /* Zero allocated hash table */ + +/* Only NUMA needs hash distribution. 64bit NUMA architectures have + * sufficient vmalloc space. + */ +#ifdef CONFIG_NUMA +#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT) +extern int hashdist; /* Distribute hashes across NUMA nodes? */ +#else +#define hashdist (0) +#endif + #ifdef CONFIG_MEMTEST extern void early_memtest(phys_addr_t start, phys_addr_t end); #else diff --git a/init/main.c b/init/main.c index 8cef69c61389..51b8e7b8ae5b 100644 --- a/init/main.c +++ b/init/main.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index f14c376937e5..22a12ab5a5e9 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -4,7 +4,7 @@ * * DMA operations that map physical memory directly without using an IOMMU. */ -#include /* for max_pfn */ +#include /* for max_pfn */ #include #include #include diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 801da67e957b..5731daa09a32 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -40,7 +40,7 @@ #include #include -#include +#include #include #define CREATE_TRACE_POINTS diff --git a/kernel/futex.c b/kernel/futex.c index 3e2de8fc1891..f423f9b6577e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -65,7 +65,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 0130e488ebfe..8f36c27c1794 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -4,7 +4,7 @@ #endif #include -#include +#include #include /* diff --git a/kernel/pid.c b/kernel/pid.c index cdf63e53a014..b2f6c506035d 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 34116a6097be..3c9e365438ad 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 429e4a3833ca..1b2a029360b7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/profile.c b/kernel/profile.c index 9aa2a4445b0d..9c08a2c7cb1d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include diff --git a/lib/cpumask.c b/lib/cpumask.c index 1405cb22e6bc..75b5e7672c4c 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include /** * cpumask_next - get the next cpu in a cpumask diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e35d99844612..c007fb5fb8d5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 785a9707786b..c7550eb65922 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -10,11 +10,10 @@ * */ -#include +#include #include #include #include -#include #include #include #include diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 4f7e4b5a2f08..877de4fa0720 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -92,7 +92,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/memblock.c b/mm/memblock.c index 2ed73245b5da..c655342569f8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 7e6509a53d79..41e326472ef9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5f3291601945..ef289fadec0e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/page_ext.c b/mm/page_ext.c index 5323c2ade686..ae44f7adbe07 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include +#include #include #include #include diff --git a/mm/page_idle.c b/mm/page_idle.c index 6302bc62c27d..b9e4b42b33ab 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include +#include #include #include #include diff --git a/mm/page_owner.c b/mm/page_owner.c index d80adfe702d3..87bc0dfdb52b 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/percpu.c b/mm/percpu.c index 3050c1d37d37..61cdbb3b3736 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -65,7 +65,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include +#include #include #include #include diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 7408cabed61a..7fec05796796 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -20,7 +20,6 @@ */ #include #include -#include #include #include #include diff --git a/mm/sparse.c b/mm/sparse.c index b139fbc61d10..ab2ac45e0440 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index f5c9ef2586de..411dd7a90046 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1834818ed07b..9e6bc4d6daa7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -262,7 +262,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ca3ed931f2a9..1976fddb9e00 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -81,7 +81,7 @@ #include #include -#include +#include #include #include #include diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e948db29ab53..9b277bd36d1a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c index 2ad33ce1ea17..eca8d84d99bf 100644 --- a/net/xfrm/xfrm_hash.c +++ b/net/xfrm/xfrm_hash.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include -- cgit v1.2.3