From b925a2dc165e5ec2330ca1256704faef8ed96913 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy <maxg@mellanox.com> Date: Mon, 28 Aug 2017 12:52:27 +0300 Subject: nvme-rdma: default MR page size to 4k Due to various page sizes in the system (IOMMU/device/kernel), we set the fabrics controller page size to 4k and block layer boundaries accordinglly. In architectures that uses different kernel page size we'll have a mismatch to the MR page size that may cause a mapping error. Update the MR page size to correspond to the core ctrl settings. Signed-off-by: Max Gurtovoy <maxg@mellanox.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de> --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index da04df1af231..a03299d77922 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -920,7 +920,11 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; int nr; - nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, PAGE_SIZE); + /* + * Align the MR to a 4K page size to match the ctrl page size and + * the block virtual boundary. + */ + nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); if (nr < count) { if (nr < 0) return nr; @@ -1583,7 +1587,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) goto out_cleanup_queue; ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9); + (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); error = nvme_init_identify(&ctrl->ctrl); if (error) -- cgit v1.2.3 From 4033f35d174af4804a79fd5731d9e6be976f9f28 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Mon, 28 Aug 2017 10:47:18 +0200 Subject: nvme-pci: use dma memory for the host memory buffer descriptors The NVMe 1.3 specification says in section 5.21.1.13: "After a successful completion of a Set Features enabling the host memory buffer, the host shall not write to the associated host memory region, buffer size, or descriptor list until the host memory buffer has been disabled." While this doesn't state that the descriptor list must remain accessible to the device it certainly implies it must remaing readable by the device. So switch to a dma coherent allocation for the descriptor list just to be safe - it's not like the cost for it matters compared to the actual memory buffers. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Keith Busch <keith.busch@intel.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> Fixes: 87ad72a59a38 ("nvme-pci: implement host memory buffer support") --- drivers/nvme/host/pci.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 925467b31a33..ea892e732268 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -109,6 +109,7 @@ struct nvme_dev { /* host memory buffer support: */ u64 host_mem_size; u32 nr_host_mem_descs; + dma_addr_t host_mem_descs_dma; struct nvme_host_mem_buf_desc *host_mem_descs; void **host_mem_desc_bufs; }; @@ -1565,16 +1566,10 @@ static inline void nvme_release_cmb(struct nvme_dev *dev) static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) { - size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs); + u64 dma_addr = dev->host_mem_descs_dma; struct nvme_command c; - u64 dma_addr; int ret; - dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len, - DMA_TO_DEVICE); - if (dma_mapping_error(dev->dev, dma_addr)) - return -ENOMEM; - memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF); @@ -1591,7 +1586,6 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) "failed to set host mem (err %d, flags %#x).\n", ret, bits); } - dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE); return ret; } @@ -1609,7 +1603,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev) kfree(dev->host_mem_desc_bufs); dev->host_mem_desc_bufs = NULL; - kfree(dev->host_mem_descs); + dma_free_coherent(dev->dev, + dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), + dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs = NULL; } @@ -1617,6 +1613,7 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) { struct nvme_host_mem_buf_desc *descs; u32 chunk_size, max_entries, len; + dma_addr_t descs_dma; int i = 0; void **bufs; u64 size = 0, tmp; @@ -1627,7 +1624,8 @@ retry: tmp = (preferred + chunk_size - 1); do_div(tmp, chunk_size); max_entries = tmp; - descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL); + descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs), + &descs_dma, GFP_KERNEL); if (!descs) goto out; @@ -1661,6 +1659,7 @@ retry: dev->nr_host_mem_descs = i; dev->host_mem_size = size; dev->host_mem_descs = descs; + dev->host_mem_descs_dma = descs_dma; dev->host_mem_desc_bufs = bufs; return 0; @@ -1674,7 +1673,8 @@ out_free_bufs: kfree(bufs); out_free_descs: - kfree(descs); + dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs, + descs_dma); out: /* try a smaller chunk size if we failed early */ if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) { -- cgit v1.2.3