From 2412e927604e0af7e17ae4b688b85a1e87e378fe Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Aug 2017 11:59:49 -0400
Subject: sunrpc: Const-ify instances of struct svc_xprt_ops

Close an attack vector by moving the arrays of server-side transport
methods to read-only memory.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c                     | 6 +++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2b720fa35c4f..b434f524d677 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -687,7 +687,7 @@ static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
 	return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
 }
 
-static struct svc_xprt_ops svc_udp_ops = {
+static const struct svc_xprt_ops svc_udp_ops = {
 	.xpo_create = svc_udp_create,
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
@@ -1229,7 +1229,7 @@ static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
 {
 }
 
-static struct svc_xprt_ops svc_tcp_bc_ops = {
+static const struct svc_xprt_ops svc_tcp_bc_ops = {
 	.xpo_create = svc_bc_tcp_create,
 	.xpo_detach = svc_bc_tcp_sock_detach,
 	.xpo_free = svc_bc_sock_free,
@@ -1263,7 +1263,7 @@ static void svc_cleanup_bc_xprt_sock(void)
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-static struct svc_xprt_ops svc_tcp_ops = {
+static const struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_create = svc_tcp_create,
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_sendto = svc_tcp_sendto,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e660d4965b18..2aa8473dcc97 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -70,7 +70,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt);
 static int svc_rdma_secure_port(struct svc_rqst *);
 static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
 
-static struct svc_xprt_ops svc_rdma_ops = {
+static const struct svc_xprt_ops svc_rdma_ops = {
 	.xpo_create = svc_rdma_create,
 	.xpo_recvfrom = svc_rdma_recvfrom,
 	.xpo_sendto = svc_rdma_sendto,
@@ -98,7 +98,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
 static void svc_rdma_bc_detach(struct svc_xprt *);
 static void svc_rdma_bc_free(struct svc_xprt *);
 
-static struct svc_xprt_ops svc_rdma_bc_ops = {
+static const struct svc_xprt_ops svc_rdma_bc_ops = {
 	.xpo_create = svc_rdma_bc_create,
 	.xpo_detach = svc_rdma_bc_detach,
 	.xpo_free = svc_rdma_bc_free,
-- 
cgit v1.2.3


From afea5657c20b11ec9f895ac5cc33b560fb1e0276 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Aug 2017 12:00:06 -0400
Subject: sunrpc: Const-ify struct sv_serv_ops

Close an attack vector by moving the arrays of per-server methods to
read-only memory.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svc.c             |  2 +-
 fs/nfs/callback.c          | 10 +++++-----
 fs/nfsd/nfssvc.c           |  2 +-
 include/linux/sunrpc/svc.h |  6 +++---
 net/sunrpc/svc.c           |  6 +++---
 5 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 726b6cecf430..b995bdc13976 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -396,7 +396,7 @@ out_rqst:
 	return error;
 }
 
-static struct svc_serv_ops lockd_sv_ops = {
+static const struct svc_serv_ops lockd_sv_ops = {
 	.svo_shutdown		= svc_rpcb_cleanup,
 	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
 };
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 34323877ec13..2cddf7f437e6 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -226,26 +226,26 @@ err_bind:
 	return ret;
 }
 
-static struct svc_serv_ops nfs40_cb_sv_ops = {
+static const struct svc_serv_ops nfs40_cb_sv_ops = {
 	.svo_function		= nfs4_callback_svc,
 	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
 	.svo_setup		= svc_set_num_threads_sync,
 	.svo_module		= THIS_MODULE,
 };
 #if defined(CONFIG_NFS_V4_1)
-static struct svc_serv_ops nfs41_cb_sv_ops = {
+static const struct svc_serv_ops nfs41_cb_sv_ops = {
 	.svo_function		= nfs41_callback_svc,
 	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
 	.svo_setup		= svc_set_num_threads_sync,
 	.svo_module		= THIS_MODULE,
 };
 
-static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
 	[0] = &nfs40_cb_sv_ops,
 	[1] = &nfs41_cb_sv_ops,
 };
 #else
-static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
 	[0] = &nfs40_cb_sv_ops,
 	[1] = NULL,
 };
@@ -254,8 +254,8 @@ static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
 static struct svc_serv *nfs_callback_create_svc(int minorversion)
 {
 	struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
+	const struct svc_serv_ops *sv_ops;
 	struct svc_serv *serv;
-	struct svc_serv_ops *sv_ops;
 
 	/*
 	 * Check whether we're already up and running.
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 063ae7de2c12..7e3af3ef0917 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -475,7 +475,7 @@ static int nfsd_get_default_max_blksize(void)
 	return ret;
 }
 
-static struct svc_serv_ops nfsd_thread_sv_ops = {
+static const struct svc_serv_ops nfsd_thread_sv_ops = {
 	.svo_shutdown		= nfsd_last_thread,
 	.svo_function		= nfsd,
 	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index a3f8af9bd543..38f561b2dda3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -99,7 +99,7 @@ struct svc_serv {
 
 	unsigned int		sv_nrpools;	/* number of thread pools */
 	struct svc_pool *	sv_pools;	/* array of thread pools */
-	struct svc_serv_ops	*sv_ops;	/* server operations */
+	const struct svc_serv_ops *sv_ops;	/* server operations */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	struct list_head	sv_cb_list;	/* queue for callback requests
 						 * that arrive over the same
@@ -465,7 +465,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
 int svc_bind(struct svc_serv *serv, struct net *net);
 struct svc_serv *svc_create(struct svc_program *, unsigned int,
-			    struct svc_serv_ops *);
+			    const struct svc_serv_ops *);
 struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
 					struct svc_pool *pool, int node);
 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
@@ -475,7 +475,7 @@ void		   svc_exit_thread(struct svc_rqst *);
 unsigned int	   svc_pool_map_get(void);
 void		   svc_pool_map_put(void);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
-			struct svc_serv_ops *);
+			const struct svc_serv_ops *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 int		   svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
 int		   svc_pool_stats_open(struct svc_serv *serv, struct file *file);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 85ce0db5b0a6..aa04666f929d 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -421,7 +421,7 @@ __svc_init_bc(struct svc_serv *serv)
  */
 static struct svc_serv *
 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-	     struct svc_serv_ops *ops)
+	     const struct svc_serv_ops *ops)
 {
 	struct svc_serv	*serv;
 	unsigned int vers;
@@ -486,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 struct svc_serv *
 svc_create(struct svc_program *prog, unsigned int bufsize,
-	   struct svc_serv_ops *ops)
+	   const struct svc_serv_ops *ops)
 {
 	return __svc_create(prog, bufsize, /*npools*/1, ops);
 }
@@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create);
 
 struct svc_serv *
 svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
-		  struct svc_serv_ops *ops)
+		  const struct svc_serv_ops *ops)
 {
 	struct svc_serv *serv;
 	unsigned int npools = svc_pool_map_get();
-- 
cgit v1.2.3


From 7075a867ce5748007b299bd28f12754cd51719d4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Aug 2017 12:00:14 -0400
Subject: svcrdma: Clean up svc_rdma_build_read_chunk()

Dan Carpenter <dan.carpenter@oracle.com> observed that the while()
loop in svc_rdma_build_read_chunk() does not document the assumption
that the loop interior is always executed at least once.

Defensive: the function now returns -EINVAL if this assumption
fails.

Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_rw.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 933f79bed270..1f34fae7aec0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -660,19 +660,21 @@ out_initerr:
 	return -EIO;
 }
 
+/* Walk the segments in the Read chunk starting at @p and construct
+ * RDMA Read operations to pull the chunk to the server.
+ */
 static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
 				     struct svc_rdma_read_info *info,
 				     __be32 *p)
 {
 	int ret;
 
+	ret = -EINVAL;
 	info->ri_chunklen = 0;
-	while (*p++ != xdr_zero) {
+	while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) {
 		u32 rs_handle, rs_length;
 		u64 rs_offset;
 
-		if (be32_to_cpup(p++) != info->ri_position)
-			break;
 		rs_handle = be32_to_cpup(p++);
 		rs_length = be32_to_cpup(p++);
 		p = xdr_decode_hyper(p, &rs_offset);
-- 
cgit v1.2.3


From 193bcb7b3719a315814dce40fc8dcd1af786ea64 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 18 Aug 2017 11:12:35 -0400
Subject: svcrdma: Populate tail iovec when receiving

So that NFS WRITE payloads can eventually be placed directly into a
file's page cache, enable the RPC-over-RDMA transport to present
these payloads in the xdr_buf's page list, while placing trailing
content (such as a GETATTR operation) in the xdr_buf's tail.

After this change, the RPC-over-RDMA's "copy tail" hack, added by
commit a97c331f9aa9 ("svcrdma: Handle additional inline content"),
is no longer needed and can be removed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_rw.c | 108 ++++++--------------------------------
 1 file changed, 15 insertions(+), 93 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 1f34fae7aec0..7dcda4597057 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -691,78 +691,6 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
 	return ret;
 }
 
-/* If there is inline content following the Read chunk, append it to
- * the page list immediately following the data payload. This has to
- * be done after the reader function has determined how many pages
- * were consumed for RDMA Read.
- *
- * On entry, ri_pageno and ri_pageoff point directly to the end of the
- * page list. On exit, both have been updated to the new "next byte".
- *
- * Assumptions:
- *	- Inline content fits entirely in rq_pages[0]
- *	- Trailing content is only a handful of bytes
- */
-static int svc_rdma_copy_tail(struct svc_rqst *rqstp,
-			      struct svc_rdma_read_info *info)
-{
-	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
-	unsigned int tail_length, remaining;
-	u8 *srcp, *destp;
-
-	/* Assert that all inline content fits in page 0. This is an
-	 * implementation limit, not a protocol limit.
-	 */
-	if (head->arg.head[0].iov_len > PAGE_SIZE) {
-		pr_warn_once("svcrdma: too much trailing inline content\n");
-		return -EINVAL;
-	}
-
-	srcp = head->arg.head[0].iov_base;
-	srcp += info->ri_position;
-	tail_length = head->arg.head[0].iov_len - info->ri_position;
-	remaining = tail_length;
-
-	/* If there is room on the last page in the page list, try to
-	 * fit the trailing content there.
-	 */
-	if (info->ri_pageoff > 0) {
-		unsigned int len;
-
-		len = min_t(unsigned int, remaining,
-			    PAGE_SIZE - info->ri_pageoff);
-		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
-		destp += info->ri_pageoff;
-
-		memcpy(destp, srcp, len);
-		srcp += len;
-		destp += len;
-		info->ri_pageoff += len;
-		remaining -= len;
-
-		if (info->ri_pageoff == PAGE_SIZE) {
-			info->ri_pageno++;
-			info->ri_pageoff = 0;
-		}
-	}
-
-	/* Otherwise, a fresh page is needed. */
-	if (remaining) {
-		head->arg.pages[info->ri_pageno] =
-				rqstp->rq_pages[info->ri_pageno];
-		head->count++;
-
-		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
-		memcpy(destp, srcp, remaining);
-		info->ri_pageoff += remaining;
-	}
-
-	head->arg.page_len += tail_length;
-	head->arg.len += tail_length;
-	head->arg.buflen += tail_length;
-	return 0;
-}
-
 /* Construct RDMA Reads to pull over a normal Read chunk. The chunk
  * data lands in the page list of head->arg.pages.
  *
@@ -787,34 +715,28 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
 	if (ret < 0)
 		goto out;
 
-	/* Read chunk may need XDR round-up (see RFC 5666, s. 3.7).
+	/* Split the Receive buffer between the head and tail
+	 * buffers at Read chunk's position. XDR roundup of the
+	 * chunk is not included in either the pagelist or in
+	 * the tail.
 	 */
-	if (info->ri_chunklen & 3) {
-		u32 padlen = 4 - (info->ri_chunklen & 3);
-
-		info->ri_chunklen += padlen;
+	head->arg.tail[0].iov_base =
+		head->arg.head[0].iov_base + info->ri_position;
+	head->arg.tail[0].iov_len =
+		head->arg.head[0].iov_len - info->ri_position;
+	head->arg.head[0].iov_len = info->ri_position;
 
-		/* NB: data payload always starts on XDR alignment,
-		 * thus the pad can never contain a page boundary.
-		 */
-		info->ri_pageoff += padlen;
-		if (info->ri_pageoff == PAGE_SIZE) {
-			info->ri_pageno++;
-			info->ri_pageoff = 0;
-		}
-	}
+	/* Read chunk may need XDR roundup (see RFC 5666, s. 3.7).
+	 *
+	 * NFSv2/3 write decoders need the length of the tail to
+	 * contain the size of the roundup padding.
+	 */
+	head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3);
 
 	head->arg.page_len = info->ri_chunklen;
 	head->arg.len += info->ri_chunklen;
 	head->arg.buflen += info->ri_chunklen;
 
-	if (info->ri_position < head->arg.head[0].iov_len) {
-		ret = svc_rdma_copy_tail(rqstp, info);
-		if (ret < 0)
-			goto out;
-	}
-	head->arg.head[0].iov_len = info->ri_position;
-
 out:
 	return ret;
 }
-- 
cgit v1.2.3


From 5a25bfd28c321e522dbe8083ad6219fa2a820610 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Aug 2017 15:06:06 -0400
Subject: svcrdma: Limit RQ depth

Ensure that the chosen Receive Queue depth for a newly created
transport does not overrun the QP WR limit of the underlying device.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 2aa8473dcc97..cdb04f8a0c25 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -167,8 +167,8 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
 {
 	unsigned int i;
 
-	/* Each RPC/RDMA credit can consume a number of send
-	 * and receive WQEs. One ctxt is allocated for each.
+	/* Each RPC/RDMA credit can consume one Receive and
+	 * one Send WQE at the same time.
 	 */
 	i = xprt->sc_sq_depth + xprt->sc_rq_depth;
 
@@ -742,13 +742,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
 				  (size_t)RPCSVC_MAXPAGES);
 	newxprt->sc_max_req_size = svcrdma_max_req_size;
-	newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
-					 svcrdma_max_requests);
-	newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
-	newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
-					    svcrdma_max_bc_requests);
+	newxprt->sc_max_requests = svcrdma_max_requests;
+	newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
 	newxprt->sc_rq_depth = newxprt->sc_max_requests +
 			       newxprt->sc_max_bc_requests;
+	if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) {
+		pr_warn("svcrdma: reducing receive depth to %d\n",
+			dev->attrs.max_qp_wr);
+		newxprt->sc_rq_depth = dev->attrs.max_qp_wr;
+		newxprt->sc_max_requests = newxprt->sc_rq_depth - 2;
+		newxprt->sc_max_bc_requests = 2;
+	}
+	newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
 	newxprt->sc_sq_depth = newxprt->sc_rq_depth;
 	atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
 
-- 
cgit v1.2.3


From 26fb2254dd33b02a522fac42745693f5969b7d4b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 28 Aug 2017 15:06:22 -0400
Subject: svcrdma: Estimate Send Queue depth properly

The rdma_rw API adjusts max_send_wr upwards during the
rdma_create_qp() call. If the ULP actually wants to take advantage
of these extra resources, it must increase the size of its send
completion queue (created before rdma_create_qp is called) and
increase its send queue accounting limit.

Use the new rdma_rw_mr_factor API to figure out the correct value
to use for the Send Queue and Send Completion Queue depths.

And, ensure that the chosen Send Queue depth for a newly created
transport does not overrun the QP WR limit of the underlying device.

Lastly, there's no longer a need to carry the Send Queue depth in
struct svcxprt_rdma, since the value is used only in the
svc_rdma_accept() path.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index cdb04f8a0c25..5caf8e722a11 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -51,6 +51,7 @@
 #include <linux/workqueue.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
+#include <rdma/rw.h>
 #include <linux/sunrpc/svc_rdma.h>
 #include <linux/export.h>
 #include "xprt_rdma.h"
@@ -713,7 +714,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	struct ib_qp_init_attr qp_attr;
 	struct ib_device *dev;
 	struct sockaddr *sap;
-	unsigned int i;
+	unsigned int i, ctxts;
 	int ret = 0;
 
 	listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
@@ -754,7 +755,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		newxprt->sc_max_bc_requests = 2;
 	}
 	newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
-	newxprt->sc_sq_depth = newxprt->sc_rq_depth;
+	ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
+	ctxts *= newxprt->sc_max_requests;
+	newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts;
+	if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
+		pr_warn("svcrdma: reducing send depth to %d\n",
+			dev->attrs.max_qp_wr);
+		newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
+	}
 	atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
 
 	if (!svc_rdma_prealloc_ctxts(newxprt))
@@ -789,8 +797,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	qp_attr.event_handler = qp_event_handler;
 	qp_attr.qp_context = &newxprt->sc_xprt;
 	qp_attr.port_num = newxprt->sc_port_num;
-	qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
-	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
+	qp_attr.cap.max_rdma_ctxs = ctxts;
+	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
 	qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
 	qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
 	qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
@@ -858,6 +866,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
 	dprintk("    max_sge         : %d\n", newxprt->sc_max_sge);
 	dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
+	dprintk("    rdma_rw_ctxs    : %d\n", ctxts);
 	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
 	dprintk("    ord             : %d\n", newxprt->sc_ord);
 
-- 
cgit v1.2.3