From bc3fe2e3769874dfa8674791e84c4a901ba9e48b Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 27 Jul 2015 18:10:12 -0500 Subject: svcrdma: Use max_sge_rd for destination read depths Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 12 +----------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 ++++ 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'net/sunrpc/xprtrdma') diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 2e1348bde325..cb5174284074 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -115,15 +115,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_arg.tail[0].iov_len = 0; } -static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) -{ - if (!rdma_cap_read_multi_sge(xprt->sc_cm_id->device, - xprt->sc_cm_id->port_num)) - return 1; - else - return min_t(int, sge_count, xprt->sc_max_sge); -} - /* Issue an RDMA_READ using the local lkey to map the data sink */ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, @@ -144,8 +135,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, ctxt->direction = DMA_FROM_DEVICE; ctxt->read_hdr = head; - pages_needed = - min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); + pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd); read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); for (pno = 0; pno < pages_needed; pno++) { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6b36279e4288..fdc850ffc26c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -872,6 +872,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) * capabilities of this particular device */ newxprt->sc_max_sge = min((size_t)devattr.max_sge, (size_t)RPCSVC_MAXPAGES); + newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd, + RPCSVC_MAXPAGES); newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr, (size_t)svcrdma_max_requests); newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; @@ -1046,6 +1048,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) " remote_ip : %pI4\n" " remote_port : %d\n" " max_sge : %d\n" + " max_sge_rd : %d\n" " sq_depth : %d\n" " max_requests : %d\n" " ord : %d\n", @@ -1059,6 +1062,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> route.addr.dst_addr)->sin_port), newxprt->sc_max_sge, + newxprt->sc_max_sge_rd, newxprt->sc_sq_depth, newxprt->sc_max_requests, newxprt->sc_ord); -- cgit v1.2.3 From 0410e38eca85e042f5d5a281dbcc792db701ed44 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 30 Jul 2015 10:32:39 +0300 Subject: xprtrdma, svcrdma: Convert to ib_alloc_mr Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/frwr_ops.c | 6 +++--- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/sunrpc/xprtrdma') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 04ea914201b2..5318951b3b53 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -117,7 +117,7 @@ __frwr_recovery_worker(struct work_struct *work) if (ib_dereg_mr(r->r.frmr.fr_mr)) goto out_fail; - r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth); + r->r.frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(r->r.frmr.fr_mr)) goto out_fail; @@ -148,7 +148,7 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, struct rpcrdma_frmr *f = &r->r.frmr; int rc; - f->fr_mr = ib_alloc_fast_reg_mr(pd, depth); + f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); @@ -158,7 +158,7 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, out_mr_err: rc = PTR_ERR(f->fr_mr); - dprintk("RPC: %s: ib_alloc_fast_reg_mr status %i\n", + dprintk("RPC: %s: ib_alloc_mr status %i\n", __func__, rc); return rc; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index fdc850ffc26c..8752a2d653b5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -738,7 +738,7 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) if (!frmr) goto err; - mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); + mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, RPCSVC_MAXPAGES); if (IS_ERR(mr)) goto err_free_frmr; -- cgit v1.2.3 From 9ac07501e1918b8d1140adcc360e8d8c7f5a2f7c Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 7 Aug 2015 11:11:20 -0500 Subject: svcrdma: limit FRMR page list lengths to device max Svcrdma was incorrectly allocating fastreg MRs and page lists using RPCSVC_MAXPAGES, which can exceed the device capabilities. So limit the depth to the minimum of RPCSVC_MAXPAGES and xprt->sc_frmr_pg_list_len. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtrdma') diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 8752a2d653b5..11d5133c3f38 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -733,17 +733,19 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) struct ib_mr *mr; struct ib_fast_reg_page_list *pl; struct svc_rdma_fastreg_mr *frmr; + u32 num_sg; frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); if (!frmr) goto err; - mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, RPCSVC_MAXPAGES); + num_sg = min_t(u32, RPCSVC_MAXPAGES, xprt->sc_frmr_pg_list_len); + mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, num_sg); if (IS_ERR(mr)) goto err_free_frmr; pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, - RPCSVC_MAXPAGES); + num_sg); if (IS_ERR(pl)) goto err_free_mr; -- cgit v1.2.3 From 7dd78647a2c2c224e376fc72797d411a3a0bb047 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 5 Aug 2015 14:34:31 -0600 Subject: IB/core: Make ib_dealloc_pd return void The majority of callers never check the return value, and even if they did, they can't do anything about a failure. All possible failure cases represent a bug in the caller, so just WARN_ON inside the function instead. This fixes a few random errors: net/rd/iw.c infinite loops while it fails. (racing with EBUSY?) This also lays the ground work to get rid of error return from the drivers. Most drivers do not error, the few that do are broken since it cannot be handled. Since uverbs can legitimately make use of EBUSY, open code the check. Signed-off-by: Jason Gunthorpe Reviewed-by: Chuck Lever Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 22 ++++++++++++++++------ drivers/infiniband/core/verbs.c | 26 ++++++++++++++++++++------ drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 4 +--- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- include/rdma/ib_verbs.h | 6 +----- net/rds/iw.c | 5 +---- net/sunrpc/xprtrdma/verbs.c | 2 +- 7 files changed, 41 insertions(+), 26 deletions(-) (limited to 'net/sunrpc/xprtrdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 258485ee46b2..4c98696e3626 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -606,6 +606,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, { struct ib_uverbs_dealloc_pd cmd; struct ib_uobject *uobj; + struct ib_pd *pd; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -614,15 +615,20 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); if (!uobj) return -EINVAL; + pd = uobj->object; - ret = ib_dealloc_pd(uobj->object); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + if (atomic_read(&pd->usecnt)) { + ret = -EBUSY; + goto err_put; + } + ret = pd->device->dealloc_pd(uobj->object); + WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); if (ret) - return ret; + goto err_put; + + uobj->live = 0; + put_uobj_write(uobj); idr_remove_uobj(&ib_uverbs_pd_idr, uobj); @@ -633,6 +639,10 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, put_uobj(uobj); return in_len; + +err_put: + put_uobj_write(uobj); + return ret; } struct xrcd_table_entry { diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 2e5fd89a8929..aad8b3ce66cc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -260,18 +260,32 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) } EXPORT_SYMBOL(ib_alloc_pd); -int ib_dealloc_pd(struct ib_pd *pd) +/** + * ib_dealloc_pd - Deallocates a protection domain. + * @pd: The protection domain to deallocate. + * + * It is an error to call this function while any resources in the pd still + * exist. The caller is responsible to synchronously destroy them and + * guarantee no new allocations will happen. + */ +void ib_dealloc_pd(struct ib_pd *pd) { + int ret; + if (pd->local_mr) { - if (ib_dereg_mr(pd->local_mr)) - return -EBUSY; + ret = ib_dereg_mr(pd->local_mr); + WARN_ON(ret); pd->local_mr = NULL; } - if (atomic_read(&pd->usecnt)) - return -EBUSY; + /* uverbs manipulates usecnt with proper locking, while the kabi + requires the caller to guarantee we can't race here. */ + WARN_ON(atomic_read(&pd->usecnt)); - return pd->device->dealloc_pd(pd); + /* Making delalloc_pd a void return is a WIP, no driver should return + an error here. */ + ret = pd->device->dealloc_pd(pd); + WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); } EXPORT_SYMBOL(ib_dealloc_pd); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 8c451983d8a5..78845b6e8b81 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -280,9 +280,7 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) priv->wq = NULL; } - if (ib_dealloc_pd(priv->pd)) - ipoib_warn(priv, "ib_dealloc_pd failed\n"); - + ib_dealloc_pd(priv->pd); } void ipoib_event(struct ib_event_handler *handler, diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ad2d2b50cd7f..ae70cc1463ac 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -185,7 +185,7 @@ static void iser_free_device_ib_res(struct iser_device *device) (void)ib_unregister_event_handler(&device->event_handler); (void)ib_dereg_mr(device->mr); - (void)ib_dealloc_pd(device->pd); + ib_dealloc_pd(device->pd); kfree(device->comps); device->comps = NULL; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 09400512d579..128abf2888ab 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2196,11 +2196,7 @@ int ib_find_pkey(struct ib_device *device, struct ib_pd *ib_alloc_pd(struct ib_device *device); -/** - * ib_dealloc_pd - Deallocates a protection domain. - * @pd: The protection domain to deallocate. - */ -int ib_dealloc_pd(struct ib_pd *pd); +void ib_dealloc_pd(struct ib_pd *pd); /** * ib_create_ah - Creates an address handle for the given address vector. diff --git a/net/rds/iw.c b/net/rds/iw.c index 7cc2f32a0cb3..c7dcddbf17cb 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -148,10 +148,7 @@ static void rds_iw_remove_one(struct ib_device *device, void *client_data) if (rds_iwdev->mr) ib_dereg_mr(rds_iwdev->mr); - while (ib_dealloc_pd(rds_iwdev->pd)) { - rdsdebug("Failed to dealloc pd %p\n", rds_iwdev->pd); - msleep(1); - } + ib_dealloc_pd(rds_iwdev->pd); list_del(&rds_iwdev->list); kfree(rds_iwdev); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 891c4ede2c20..afd504375a9a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -624,7 +624,7 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) /* If the pd is still busy, xprtrdma missed freeing a resource */ if (ia->ri_pd && !IS_ERR(ia->ri_pd)) - WARN_ON(ib_dealloc_pd(ia->ri_pd)); + ib_dealloc_pd(ia->ri_pd); } /* -- cgit v1.2.3