From 0dde584882ade13dc9708d611fbf69b0ae8a9e48 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 2 Dec 2016 16:35:09 +0100
Subject: libceph: drop len argument of *verify_authorizer_reply()

The length of the reply is protocol-dependent - for cephx it's
ceph_x_authorize_reply.  Nothing sensible can be passed from the
messenger layer anyway.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/auth.h      | 5 ++---
 include/linux/ceph/messenger.h | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index 374bb1c4ef52..a6747789fe5c 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -64,7 +64,7 @@ struct ceph_auth_client_ops {
 	int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
 				 struct ceph_auth_handshake *auth);
 	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
-				       struct ceph_authorizer *a, size_t len);
+				       struct ceph_authorizer *a);
 	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
 				      int peer_type);
 
@@ -118,8 +118,7 @@ extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
 				       int peer_type,
 				       struct ceph_auth_handshake *a);
 extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
-					     struct ceph_authorizer *a,
-					     size_t len);
+					     struct ceph_authorizer *a);
 extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
 					    int peer_type);
 
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 8dbd7879fdc6..531f2d882bd9 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -30,7 +30,7 @@ struct ceph_connection_operations {
 	struct ceph_auth_handshake *(*get_authorizer) (
 				struct ceph_connection *con,
 			       int *proto, int force_new);
-	int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
+	int (*verify_authorizer_reply) (struct ceph_connection *con);
 	int (*invalidate_authorizer)(struct ceph_connection *con);
 
 	/* there was some error on the socket (disconnect, whatever) */
-- 
cgit v1.2.3


From e9e427f0a14f7e4773896dd7af357819a56d097a Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 10 Nov 2016 16:02:06 +0800
Subject: ceph: check availability of mds cluster on mount

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/mds_client.c        |  19 ++++--
 fs/ceph/mdsmap.c            | 163 ++++++++++++++++++++++++++++++++++++++++++--
 fs/ceph/super.c             |  10 +++
 fs/ceph/super.h             |   1 +
 include/linux/ceph/mdsmap.h |   5 ++
 5 files changed, 187 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bf4d3d26850c..4f49253387a0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2100,17 +2100,26 @@ static int __do_request(struct ceph_mds_client *mdsc,
 		err = -EIO;
 		goto finish;
 	}
+	if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
+		if (mdsc->mdsmap_err) {
+			err = mdsc->mdsmap_err;
+			dout("do_request mdsmap err %d\n", err);
+			goto finish;
+		}
+		if (!(mdsc->fsc->mount_options->flags &
+		      CEPH_MOUNT_OPT_MOUNTWAIT) &&
+		    !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
+			err = -ENOENT;
+			pr_info("probably no mds server is up\n");
+			goto finish;
+		}
+	}
 
 	put_request_session(req);
 
 	mds = __choose_mds(mdsc, req);
 	if (mds < 0 ||
 	    ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
-		if (mdsc->mdsmap_err) {
-			err = mdsc->mdsmap_err;
-			dout("do_request mdsmap err %d\n", err);
-			goto finish;
-		}
 		dout("do_request no mds or not active, waiting for map\n");
 		list_add(&req->r_wait, &mdsc->waiting_for_map);
 		goto out;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 8c3591a7fbae..5454e2327a5f 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -42,6 +42,60 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	return i;
 }
 
+#define __decode_and_drop_type(p, end, type, bad)		\
+	do {							\
+		if (*p + sizeof(type) > end)			\
+			goto bad;				\
+		*p += sizeof(type);				\
+	} while (0)
+
+#define __decode_and_drop_set(p, end, type, bad)		\
+	do {							\
+		u32 n;						\
+		size_t need;					\
+		ceph_decode_32_safe(p, end, n, bad);		\
+		need = sizeof(type) * n;			\
+		ceph_decode_need(p, end, need, bad);		\
+		*p += need;					\
+	} while (0)
+
+#define __decode_and_drop_map(p, end, ktype, vtype, bad)	\
+	do {							\
+		u32 n;						\
+		size_t need;					\
+		ceph_decode_32_safe(p, end, n, bad);		\
+		need = (sizeof(ktype) + sizeof(vtype)) * n;	\
+		ceph_decode_need(p, end, need, bad);		\
+		*p += need;					\
+	} while (0)
+
+
+static int __decode_and_drop_compat_set(void **p, void* end)
+{
+	int i;
+	/* compat, ro_compat, incompat*/
+	for (i = 0; i < 3; i++) {
+		u32 n;
+		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
+		/* mask */
+		*p += sizeof(u64);
+		/* names (map<u64, string>) */
+		n = ceph_decode_32(p);
+		while (n-- > 0) {
+			u32 len;
+			ceph_decode_need(p, end, sizeof(u64) + sizeof(u32),
+					 bad);
+			*p += sizeof(u64);
+			len = ceph_decode_32(p);
+			ceph_decode_need(p, end, len, bad);
+			*p += len;
+		}
+	}
+	return 0;
+bad:
+	return -1;
+}
+
 /*
  * Decode an MDS map
  *
@@ -55,6 +109,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	int i, j, n;
 	int err = -EINVAL;
 	u8 mdsmap_v, mdsmap_cv;
+	u16 mdsmap_ev;
 
 	m = kzalloc(sizeof(*m), GFP_NOFS);
 	if (m == NULL)
@@ -83,7 +138,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 
 	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
 	if (m->m_info == NULL)
-		goto badmem;
+		goto nomem;
 
 	/* pick out active nodes from mds_info (state > 0) */
 	n = ceph_decode_32(p);
@@ -166,7 +221,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 			info->export_targets = kcalloc(num_export_targets,
 						       sizeof(u32), GFP_NOFS);
 			if (info->export_targets == NULL)
-				goto badmem;
+				goto nomem;
 			for (j = 0; j < num_export_targets; j++)
 				info->export_targets[j] =
 				       ceph_decode_32(&pexport_targets);
@@ -180,24 +235,104 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	m->m_num_data_pg_pools = n;
 	m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
 	if (!m->m_data_pg_pools)
-		goto badmem;
+		goto nomem;
 	ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
 	for (i = 0; i < n; i++)
 		m->m_data_pg_pools[i] = ceph_decode_64(p);
 	m->m_cas_pg_pool = ceph_decode_64(p);
+	m->m_enabled = m->m_epoch > 1;
+
+	mdsmap_ev = 1;
+	if (mdsmap_v >= 2) {
+		ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext);
+	}
+	if (mdsmap_ev >= 3) {
+		if (__decode_and_drop_compat_set(p, end) < 0)
+			goto bad_ext;
+	}
+	/* metadata_pool */
+	if (mdsmap_ev < 5) {
+		__decode_and_drop_type(p, end, u32, bad_ext);
+	} else {
+		__decode_and_drop_type(p, end, u64, bad_ext);
+	}
 
-	/* ok, we don't care about the rest. */
+	/* created + modified + tableserver */
+	__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
+	__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
+	__decode_and_drop_type(p, end, u32, bad_ext);
+
+	/* in */
+	{
+		int num_laggy = 0;
+		ceph_decode_32_safe(p, end, n, bad_ext);
+		ceph_decode_need(p, end, sizeof(u32) * n, bad_ext);
+
+		for (i = 0; i < n; i++) {
+			s32 mds = ceph_decode_32(p);
+			if (mds >= 0 && mds < m->m_max_mds) {
+				if (m->m_info[mds].laggy)
+					num_laggy++;
+			}
+		}
+		m->m_num_laggy = num_laggy;
+	}
+
+	/* inc */
+	__decode_and_drop_map(p, end, u32, u32, bad_ext);
+	/* up */
+	__decode_and_drop_map(p, end, u32, u64, bad_ext);
+	/* failed */
+	__decode_and_drop_set(p, end, u32, bad_ext);
+	/* stopped */
+	__decode_and_drop_set(p, end, u32, bad_ext);
+
+	if (mdsmap_ev >= 4) {
+		/* last_failure_osd_epoch */
+		__decode_and_drop_type(p, end, u32, bad_ext);
+	}
+	if (mdsmap_ev >= 6) {
+		/* ever_allowed_snaps */
+		__decode_and_drop_type(p, end, u8, bad_ext);
+		/* explicitly_allowed_snaps */
+		__decode_and_drop_type(p, end, u8, bad_ext);
+	}
+	if (mdsmap_ev >= 7) {
+		/* inline_data_enabled */
+		__decode_and_drop_type(p, end, u8, bad_ext);
+	}
+	if (mdsmap_ev >= 8) {
+		u32 name_len;
+		/* enabled */
+		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
+		ceph_decode_32_safe(p, end, name_len, bad_ext);
+		ceph_decode_need(p, end, name_len, bad_ext);
+		*p += name_len;
+	}
+	/* damaged */
+	if (mdsmap_ev >= 9) {
+		size_t need;
+		ceph_decode_32_safe(p, end, n, bad_ext);
+		need = sizeof(u32) * n;
+		ceph_decode_need(p, end, need, bad_ext);
+		*p += need;
+		m->m_damaged = n > 0;
+	} else {
+		m->m_damaged = false;
+	}
+bad_ext:
 	*p = end;
 	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
 	return m;
-
-badmem:
+nomem:
 	err = -ENOMEM;
+	goto out_err;
 bad:
 	pr_err("corrupt mdsmap\n");
 	print_hex_dump(KERN_DEBUG, "mdsmap: ",
 		       DUMP_PREFIX_OFFSET, 16, 1,
 		       start, end - start, true);
+out_err:
 	ceph_mdsmap_destroy(m);
 	return ERR_PTR(err);
 }
@@ -212,3 +347,19 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
 	kfree(m->m_data_pg_pools);
 	kfree(m);
 }
+
+bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
+{
+	int i, nr_active = 0;
+	if (!m->m_enabled)
+		return false;
+	if (m->m_damaged)
+		return false;
+	if (m->m_num_laggy > 0)
+		return false;
+	for (i = 0; i < m->m_max_mds; i++) {
+		if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
+			nr_active++;
+	}
+	return nr_active > 0;
+}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b382e5910eea..537f96631785 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -137,6 +137,8 @@ enum {
 	Opt_nofscache,
 	Opt_poolperm,
 	Opt_nopoolperm,
+	Opt_require_active_mds,
+	Opt_norequire_active_mds,
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	Opt_acl,
 #endif
@@ -171,6 +173,8 @@ static match_table_t fsopt_tokens = {
 	{Opt_nofscache, "nofsc"},
 	{Opt_poolperm, "poolperm"},
 	{Opt_nopoolperm, "nopoolperm"},
+	{Opt_require_active_mds, "require_active_mds"},
+	{Opt_norequire_active_mds, "norequire_active_mds"},
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	{Opt_acl, "acl"},
 #endif
@@ -287,6 +291,12 @@ static int parse_fsopt_token(char *c, void *private)
 	case Opt_nopoolperm:
 		fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
 		break;
+	case Opt_require_active_mds:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
+		break;
+	case Opt_norequire_active_mds:
+		fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
+		break;
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	case Opt_acl:
 		fsopt->sb_flags |= MS_POSIXACL;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 622d5dd9f616..b07f55e55f60 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,6 +36,7 @@
 #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
 #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
 #define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
+#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
 
 #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
 
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 87ed09f54800..8ed5dc505fbb 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -31,6 +31,10 @@ struct ceph_mdsmap {
 	int m_num_data_pg_pools;
 	u64 *m_data_pg_pools;
 	u64 m_cas_pg_pool;
+
+	bool m_enabled;
+	bool m_damaged;
+	int m_num_laggy;
 };
 
 static inline struct ceph_entity_addr *
@@ -59,5 +63,6 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
 extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
 extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
 extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
+extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
 
 #endif
-- 
cgit v1.2.3


From 1e4ef0c6332bd90e6c70afc07b35dffaf1eab1a7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 10 Nov 2016 07:42:06 -0500
Subject: ceph: add flags parameter to send_cap_msg

Add a flags parameter to send_cap_msg, so we can request expedited
service from the MDS when we know we'll be waiting on the result.

Set that flag in the case of try_flush_caps. The callers of that
function generally wait synchronously on the result, so it's beneficial
to ask the server to expedite it.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/caps.c               | 25 +++++++++++++++----------
 include/linux/ceph/ceph_fs.h |  3 +++
 2 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 508663ed413c..b54af160526e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -996,6 +996,7 @@ struct cap_msg_args {
 	struct timespec		atime, mtime, ctime;
 	int			op, caps, wanted, dirty;
 	u32			seq, issue_seq, mseq, time_warp_seq;
+	u32			flags;
 	kuid_t			uid;
 	kgid_t			gid;
 	umode_t			mode;
@@ -1104,7 +1105,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
 	ceph_encode_64(&p, 0);
 
 	/* Advisory flags (version 10) */
-	ceph_encode_32(&p, 0);
+	ceph_encode_32(&p, arg->flags);
 
 	ceph_con_send(&arg->session->s_con, msg);
 	return 0;
@@ -1145,8 +1146,8 @@ void ceph_queue_caps_release(struct inode *inode)
  * caller should hold snap_rwsem (read), s_mutex.
  */
 static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
-		      int op, int used, int want, int retain, int flushing,
-		      u64 flush_tid, u64 oldest_flush_tid)
+		      int op, bool sync, int used, int want, int retain,
+		      int flushing, u64 flush_tid, u64 oldest_flush_tid)
 	__releases(cap->ci->i_ceph_lock)
 {
 	struct ceph_inode_info *ci = cap->ci;
@@ -1235,6 +1236,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	arg.mode = inode->i_mode;
 
 	arg.inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+	arg.flags = 0;
+	if (sync)
+		arg.flags |= CEPH_CLIENT_CAPS_SYNC;
 
 	spin_unlock(&ci->i_ceph_lock);
 
@@ -1288,6 +1292,7 @@ static inline int __send_flush_snap(struct inode *inode,
 	arg.mode = capsnap->mode;
 
 	arg.inline_data = capsnap->inline_data;
+	arg.flags = 0;
 
 	return send_cap_msg(&arg);
 }
@@ -1912,9 +1917,9 @@ ack:
 		sent++;
 
 		/* __send_cap drops i_ceph_lock */
-		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
-				      want, retain, flushing,
-				      flush_tid, oldest_flush_tid);
+		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, false,
+				cap_used, want, retain, flushing,
+				flush_tid, oldest_flush_tid);
 		goto retry; /* retake i_ceph_lock and restart our cap scan. */
 	}
 
@@ -1978,9 +1983,9 @@ retry:
 						&flush_tid, &oldest_flush_tid);
 
 		/* __send_cap drops i_ceph_lock */
-		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
-				     (cap->issued | cap->implemented),
-				     flushing, flush_tid, oldest_flush_tid);
+		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, true,
+				used, want, (cap->issued | cap->implemented),
+				flushing, flush_tid, oldest_flush_tid);
 
 		if (delayed) {
 			spin_lock(&ci->i_ceph_lock);
@@ -2173,7 +2178,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 			     inode, cap, cf->tid, ceph_cap_string(cf->caps));
 			ci->i_ceph_flags |= CEPH_I_NODELAY;
 			ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
-					  __ceph_caps_used(ci),
+					  false, __ceph_caps_used(ci),
 					  __ceph_caps_wanted(ci),
 					  cap->issued | cap->implemented,
 					  cf->caps, cf->tid, oldest_flush_tid);
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index f96de8de4fa7..f4b2ee18f38c 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -653,6 +653,9 @@ enum {
 
 extern const char *ceph_cap_op_name(int op);
 
+/* flags field in client cap messages (version >= 10) */
+#define CEPH_CLIENT_CAPS_SYNC	(0x1)
+
 /*
  * caps message, used for capability callbacks, acks, requests, etc.
  */
-- 
cgit v1.2.3


From c297eb42690b904fb5b78dd9ad001bafe25f49ec Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 2 Dec 2016 14:01:55 +0100
Subject: libceph: always signal completion when done

r_safe_completion is currently, and has always been, signaled only if
on-disk ack was requested.  It's there for fsync and syncfs, which wait
for in-flight writes to flush - all data write requests set ONDISK.

However, the pool perm check code introduced in 4.2 sends a write
request with only ACK set.  An unfortunately timed syncfs can then hang
forever: r_safe_completion won't be signaled because only an unsafe
reply was requested.

We could patch ceph_osdc_sync() to skip !ONDISK write requests, but
that is somewhat incomplete and yet another special case.  Instead,
rename this completion to r_done_completion and always signal it when
the OSD client is done with the request, whether unsafe, safe, or
error.  This is a bit cleaner and helps with the cancellation code.

Reported-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/file.c                  |  2 +-
 include/linux/ceph/osd_client.h |  2 +-
 net/ceph/osd_client.c           | 25 +++++++++++--------------
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 12ce2b562d14..f633165f3fdc 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -864,7 +864,7 @@ void ceph_sync_write_wait(struct inode *inode)
 
 		dout("sync_write_wait on tid %llu (until %llu)\n",
 		     req->r_tid, last_tid);
-		wait_for_completion(&req->r_safe_completion);
+		wait_for_completion(&req->r_done_completion);
 		ceph_osdc_put_request(req);
 
 		spin_lock(&ci->i_unsafe_lock);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index a8e66344bacc..03a6653d329a 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -176,7 +176,7 @@ struct ceph_osd_request {
 	struct kref       r_kref;
 	bool              r_mempool;
 	struct completion r_completion;
-	struct completion r_safe_completion;  /* fsync waiter */
+	struct completion r_done_completion;  /* fsync waiter */
 	ceph_osdc_callback_t r_callback;
 	ceph_osdc_unsafe_callback_t r_unsafe_callback;
 	struct list_head  r_unsafe_item;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5d812a26f05a..5a8e8670ea59 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -460,7 +460,7 @@ static void request_init(struct ceph_osd_request *req)
 
 	kref_init(&req->r_kref);
 	init_completion(&req->r_completion);
-	init_completion(&req->r_safe_completion);
+	init_completion(&req->r_done_completion);
 	RB_CLEAR_NODE(&req->r_node);
 	RB_CLEAR_NODE(&req->r_mc_node);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
@@ -1772,7 +1772,7 @@ static void complete_request(struct ceph_osd_request *req, int err)
 	req->r_result = err;
 	__finish_request(req);
 	__complete_request(req);
-	complete_all(&req->r_safe_completion);
+	complete_all(&req->r_done_completion);
 	ceph_osdc_put_request(req);
 }
 
@@ -1797,7 +1797,9 @@ static void cancel_request(struct ceph_osd_request *req)
 	dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
 
 	cancel_map_check(req);
-	finish_request(req);
+	__finish_request(req);
+	complete_all(&req->r_done_completion);
+	ceph_osdc_put_request(req);
 }
 
 static void check_pool_dne(struct ceph_osd_request *req)
@@ -2808,12 +2810,12 @@ static bool done_request(const struct ceph_osd_request *req,
  * ->r_unsafe_callback is set?	yes			no
  *
  * first reply is OK (needed	r_cb/r_completion,	r_cb/r_completion,
- * any or needed/got safe)	r_safe_completion	r_safe_completion
+ * any or needed/got safe)	r_done_completion	r_done_completion
  *
  * first reply is unsafe	r_unsafe_cb(true)	(nothing)
  *
  * when we get the safe reply	r_unsafe_cb(false),	r_cb/r_completion,
- *				r_safe_completion	r_safe_completion
+ *				r_done_completion	r_done_completion
  */
 static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
 {
@@ -2934,8 +2936,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
 			dout("req %p tid %llu cb\n", req, req->r_tid);
 			__complete_request(req);
 		}
-		if (m.flags & CEPH_OSD_FLAG_ONDISK)
-			complete_all(&req->r_safe_completion);
+		complete_all(&req->r_done_completion);
 		ceph_osdc_put_request(req);
 	} else {
 		if (req->r_unsafe_callback) {
@@ -3471,9 +3472,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 EXPORT_SYMBOL(ceph_osdc_start_request);
 
 /*
- * Unregister a registered request.  The request is not completed (i.e.
- * no callbacks or wakeups) - higher layers are supposed to know what
- * they are canceling.
+ * Unregister a registered request.  The request is not completed:
+ * ->r_result isn't set and __complete_request() isn't called.
  */
 void ceph_osdc_cancel_request(struct ceph_osd_request *req)
 {
@@ -3500,9 +3500,6 @@ static int wait_request_timeout(struct ceph_osd_request *req,
 	if (left <= 0) {
 		left = left ?: -ETIMEDOUT;
 		ceph_osdc_cancel_request(req);
-
-		/* kludge - need to to wake ceph_osdc_sync() */
-		complete_all(&req->r_safe_completion);
 	} else {
 		left = req->r_result; /* completed */
 	}
@@ -3549,7 +3546,7 @@ again:
 			up_read(&osdc->lock);
 			dout("%s waiting on req %p tid %llu last_tid %llu\n",
 			     __func__, req, req->r_tid, last_tid);
-			wait_for_completion(&req->r_safe_completion);
+			wait_for_completion(&req->r_done_completion);
 			ceph_osdc_put_request(req);
 			goto again;
 		}
-- 
cgit v1.2.3