From 7614209736fbc4927584d4387faade4f31444fce Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Mar 2020 12:03:14 +0100 Subject: ceph: check POOL_FLAG_FULL/NEARFULL in addition to OSDMAP_FULL/NEARFULL CEPH_OSDMAP_FULL/NEARFULL aren't set since mimic, so we need to consult per-pool flags as well. Unfortunately the backwards compatibility here is lacking: - the change that deprecated OSDMAP_FULL/NEARFULL went into mimic, but was guarded by require_osd_release >= RELEASE_LUMINOUS - it was subsequently backported to luminous in v12.2.2, but that makes no difference to clients that only check OSDMAP_FULL/NEARFULL because require_osd_release is not client-facing -- it is for OSDs Since all kernels are affected, the best we can do here is just start checking both map flags and pool flags and send that to stable. These checks are best effort, so take osdc->lock and look up pool flags just once. Remove the FIXME, since filesystem quotas are checked above and RADOS quotas are reflected in POOL_FLAG_FULL: when the pool reaches its quota, both POOL_FLAG_FULL and POOL_FLAG_FULL_QUOTA are set. Cc: stable@vger.kernel.org Reported-by: Yanhu Cao Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Acked-by: Sage Weil --- include/linux/ceph/osdmap.h | 4 ++++ include/linux/ceph/rados.h | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e081b56f1c1d..5e601975745f 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -37,6 +37,9 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id together */ #define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ +#define CEPH_POOL_FLAG_FULL_QUOTA (1ULL << 10) /* pool ran out of quota, + will set FULL too */ +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ struct ceph_pg_pool_info { struct rb_node node; @@ -304,5 +307,6 @@ extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); +u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id); #endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 59bdfd470100..88ed3c5c04c5 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -143,8 +143,10 @@ extern const char *ceph_osd_state_name(int s); /* * osd map flag bits */ -#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ -#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ +#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC), + not set since ~luminous */ +#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC), + not set since ~luminous */ #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ -- cgit v1.2.3 From e886274031200bb60965c1b9c49b7acda56a93bd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 10 Mar 2020 16:19:01 +0100 Subject: libceph: fix alloc_msg_with_page_vector() memory leaks Make it so that CEPH_MSG_DATA_PAGES data item can own pages, fixing a bunch of memory leaks for a page vector allocated in alloc_msg_with_page_vector(). Currently, only watch-notify messages trigger this allocation, and normally the page vector is freed either in handle_watch_notify() or by the caller of ceph_osdc_notify(). But if the message is freed before that (e.g. if the session faults while reading in the message or if the notify is stale), we leak the page vector. This was supposed to be fixed by switching to a message-owned pagelist, but that never happened. Fixes: 1907920324f1 ("libceph: support for sending notifies") Reported-by: Roman Penyaev Signed-off-by: Ilya Dryomov Reviewed-by: Roman Penyaev --- include/linux/ceph/messenger.h | 7 ++++--- net/ceph/messenger.c | 9 +++++++-- net/ceph/osd_client.c | 14 +++----------- 3 files changed, 14 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index c4458dc6a757..76371aaae2d1 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -175,9 +175,10 @@ struct ceph_msg_data { #endif /* CONFIG_BLOCK */ struct ceph_bvec_iter bvec_pos; struct { - struct page **pages; /* NOT OWNER. */ + struct page **pages; size_t length; /* total # bytes */ unsigned int alignment; /* first page */ + bool own_pages; }; struct ceph_pagelist *pagelist; }; @@ -356,8 +357,8 @@ extern void ceph_con_keepalive(struct ceph_connection *con); extern bool ceph_con_keepalive_expired(struct ceph_connection *con, unsigned long interval); -extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, - size_t length, size_t alignment); +void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, + size_t length, size_t alignment, bool own_pages); extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg, struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5b4bd8261002..f8ca5edc5f2c 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3248,12 +3248,16 @@ static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg) static void ceph_msg_data_destroy(struct ceph_msg_data *data) { - if (data->type == CEPH_MSG_DATA_PAGELIST) + if (data->type == CEPH_MSG_DATA_PAGES && data->own_pages) { + int num_pages = calc_pages_for(data->alignment, data->length); + ceph_release_page_vector(data->pages, num_pages); + } else if (data->type == CEPH_MSG_DATA_PAGELIST) { ceph_pagelist_release(data->pagelist); + } } void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, - size_t length, size_t alignment) + size_t length, size_t alignment, bool own_pages) { struct ceph_msg_data *data; @@ -3265,6 +3269,7 @@ void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, data->pages = pages; data->length = length; data->alignment = alignment & ~PAGE_MASK; + data->own_pages = own_pages; msg->data_length += length; } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b68b376d8c2f..af868d3923b9 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -962,7 +962,7 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg, BUG_ON(length > (u64) SIZE_MAX); if (length) ceph_msg_data_add_pages(msg, osd_data->pages, - length, osd_data->alignment); + length, osd_data->alignment, false); } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) { BUG_ON(!length); ceph_msg_data_add_pagelist(msg, osd_data->pagelist); @@ -4436,9 +4436,7 @@ static void handle_watch_notify(struct ceph_osd_client *osdc, CEPH_MSG_DATA_PAGES); *lreq->preply_pages = data->pages; *lreq->preply_len = data->length; - } else { - ceph_release_page_vector(data->pages, - calc_pages_for(0, data->length)); + data->own_pages = false; } } lreq->notify_finish_error = return_code; @@ -5506,9 +5504,6 @@ out_unlock_osdc: return m; } -/* - * TODO: switch to a msg-owned pagelist - */ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr) { struct ceph_msg *m; @@ -5522,7 +5517,6 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr) if (data_len) { struct page **pages; - struct ceph_osd_data osd_data; pages = ceph_alloc_page_vector(calc_pages_for(0, data_len), GFP_NOIO); @@ -5531,9 +5525,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr) return NULL; } - ceph_osd_data_pages_init(&osd_data, pages, data_len, 0, false, - false); - ceph_osdc_msg_data_add(m, &osd_data); + ceph_msg_data_add_pages(m, pages, data_len, 0, true); } return m; -- cgit v1.2.3