From 94f4857f4ba21aad4cf11dde961ea23a07b5161c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 30 Jan 2020 13:54:59 +0100 Subject: rbd: remove barriers from img_request_layered_{set,clear,test}() IMG_REQ_LAYERED is set in rbd_img_request_create(), and tested and cleared in rbd_img_request_destroy() when the image request is about to be destroyed. The barriers are unnecessary. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6343402c09e6..162fd1df06dd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1366,18 +1366,15 @@ static void rbd_osd_submit(struct ceph_osd_request *osd_req) static void img_request_layered_set(struct rbd_img_request *img_request) { set_bit(IMG_REQ_LAYERED, &img_request->flags); - smp_mb(); } static void img_request_layered_clear(struct rbd_img_request *img_request) { clear_bit(IMG_REQ_LAYERED, &img_request->flags); - smp_mb(); } static bool img_request_layered_test(struct rbd_img_request *img_request) { - smp_mb(); return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; } -- cgit v1.2.3 From 679a97d28627647e5a68dd684537e499cd741e2a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 31 Jan 2020 11:37:36 +0100 Subject: rbd: kill img_request kref The reference counter is never increased, so we can as well call rbd_img_request_destroy() directly and drop the kref. Signed-off-by: Hannes Reinecke Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 162fd1df06dd..b077c0fb9f70 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -349,7 +349,6 @@ struct rbd_img_request { struct pending_result pending; struct work_struct work; int work_result; - struct kref kref; }; #define for_each_obj_request(ireq, oreq) \ @@ -1320,15 +1319,6 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request) kref_put(&obj_request->kref, rbd_obj_request_destroy); } -static void rbd_img_request_destroy(struct kref *kref); -static void rbd_img_request_put(struct rbd_img_request *img_request) -{ - rbd_assert(img_request != NULL); - dout("%s: img %p (was %d)\n", __func__, img_request, - kref_read(&img_request->kref)); - kref_put(&img_request->kref, rbd_img_request_destroy); -} - static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, struct rbd_obj_request *obj_request) { @@ -1656,19 +1646,15 @@ static struct rbd_img_request *rbd_img_request_create( INIT_LIST_HEAD(&img_request->lock_item); INIT_LIST_HEAD(&img_request->object_extents); mutex_init(&img_request->state_mutex); - kref_init(&img_request->kref); return img_request; } -static void rbd_img_request_destroy(struct kref *kref) +static void rbd_img_request_destroy(struct rbd_img_request *img_request) { - struct rbd_img_request *img_request; struct rbd_obj_request *obj_request; struct rbd_obj_request *next_obj_request; - img_request = container_of(kref, struct rbd_img_request, kref); - dout("%s: img %p\n", __func__, img_request); WARN_ON(!list_empty(&img_request->lock_item)); @@ -2885,7 +2871,7 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req) obj_req->copyup_bvecs); } if (ret) { - rbd_img_request_put(child_img_req); + rbd_img_request_destroy(child_img_req); return ret; } @@ -3644,7 +3630,7 @@ again: if (test_bit(IMG_REQ_CHILD, &img_req->flags)) { struct rbd_obj_request *obj_req = img_req->obj_request; - rbd_img_request_put(img_req); + rbd_img_request_destroy(img_req); if (__rbd_obj_handle_request(obj_req, &result)) { img_req = obj_req->img_request; goto again; @@ -3652,7 +3638,7 @@ again: } else { struct request *rq = img_req->rq; - rbd_img_request_put(img_req); + rbd_img_request_destroy(img_req); blk_mq_end_request(rq, errno_to_blk_status(result)); } } @@ -4798,7 +4784,7 @@ static void rbd_queue_workfn(struct work_struct *work) return; err_img_request: - rbd_img_request_put(img_request); + rbd_img_request_destroy(img_request); err_rq: if (result) rbd_warn(rbd_dev, "%s %llx at %llx result %d", -- cgit v1.2.3 From 78b42a871a654face984c844b43c777d66adb1fe Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 12 Feb 2020 14:34:03 +0100 Subject: rbd: get rid of img_request_layered_clear() No need to clear IMG_REQ_LAYERED before destroying the request. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b077c0fb9f70..c61c5dd424fa 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1358,11 +1358,6 @@ static void img_request_layered_set(struct rbd_img_request *img_request) set_bit(IMG_REQ_LAYERED, &img_request->flags); } -static void img_request_layered_clear(struct rbd_img_request *img_request) -{ - clear_bit(IMG_REQ_LAYERED, &img_request->flags); -} - static bool img_request_layered_test(struct rbd_img_request *img_request) { return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; @@ -1661,10 +1656,8 @@ static void rbd_img_request_destroy(struct rbd_img_request *img_request) for_each_obj_request_safe(img_request, obj_request, next_obj_request) rbd_img_obj_request_del(img_request, obj_request); - if (img_request_layered_test(img_request)) { - img_request_layered_clear(img_request); + if (img_request_layered_test(img_request)) rbd_dev_parent_put(img_request->rbd_dev); - } if (rbd_img_is_write(img_request)) ceph_put_snap_context(img_request->snapc); -- cgit v1.2.3 From a52cc685753568e5bcbe762586c2bfbe7175255e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 12 Feb 2020 15:08:39 +0100 Subject: rbd: acquire header_rwsem just once in rbd_queue_workfn() Currently header_rwsem is acquired twice: once in rbd_dev_parent_get() when the image request is being created and then in rbd_queue_workfn() to capture mapping_size and snapc. Introduce rbd_img_capture_header() and move image request allocation so that header_rwsem can be acquired just once. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 59 ++++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 28 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c61c5dd424fa..acda61f5be03 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1601,10 +1601,8 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) if (!rbd_dev->parent_spec) return false; - down_read(&rbd_dev->header_rwsem); if (rbd_dev->parent_overlap) counter = atomic_inc_return_safe(&rbd_dev->parent_ref); - up_read(&rbd_dev->header_rwsem); if (counter < 0) rbd_warn(rbd_dev, "parent reference overflow"); @@ -1619,8 +1617,7 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) */ static struct rbd_img_request *rbd_img_request_create( struct rbd_device *rbd_dev, - enum obj_operation_type op_type, - struct ceph_snap_context *snapc) + enum obj_operation_type op_type) { struct rbd_img_request *img_request; @@ -1630,13 +1627,6 @@ static struct rbd_img_request *rbd_img_request_create( img_request->rbd_dev = rbd_dev; img_request->op_type = op_type; - if (!rbd_img_is_write(img_request)) - img_request->snap_id = rbd_dev->spec->snap_id; - else - img_request->snapc = snapc; - - if (rbd_dev_parent_get(rbd_dev)) - img_request_layered_set(img_request); INIT_LIST_HEAD(&img_request->lock_item); INIT_LIST_HEAD(&img_request->object_extents); @@ -1645,6 +1635,21 @@ static struct rbd_img_request *rbd_img_request_create( return img_request; } +static void rbd_img_capture_header(struct rbd_img_request *img_req) +{ + struct rbd_device *rbd_dev = img_req->rbd_dev; + + lockdep_assert_held(&rbd_dev->header_rwsem); + + if (rbd_img_is_write(img_req)) + img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc); + else + img_req->snap_id = rbd_dev->spec->snap_id; + + if (rbd_dev_parent_get(rbd_dev)) + img_request_layered_set(img_req); +} + static void rbd_img_request_destroy(struct rbd_img_request *img_request) { struct rbd_obj_request *obj_request; @@ -2825,17 +2830,21 @@ static int rbd_obj_read_object(struct rbd_obj_request *obj_req) static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req) { struct rbd_img_request *img_req = obj_req->img_request; + struct rbd_device *parent = img_req->rbd_dev->parent; struct rbd_img_request *child_img_req; int ret; - child_img_req = rbd_img_request_create(img_req->rbd_dev->parent, - OBJ_OP_READ, NULL); + child_img_req = rbd_img_request_create(parent, OBJ_OP_READ); if (!child_img_req) return -ENOMEM; __set_bit(IMG_REQ_CHILD, &child_img_req->flags); child_img_req->obj_request = obj_req; + down_read(&parent->header_rwsem); + rbd_img_capture_header(child_img_req); + up_read(&parent->header_rwsem); + dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req, obj_req); @@ -4686,7 +4695,6 @@ static void rbd_queue_workfn(struct work_struct *work) struct request *rq = blk_mq_rq_from_pdu(work); struct rbd_device *rbd_dev = rq->q->queuedata; struct rbd_img_request *img_request; - struct ceph_snap_context *snapc = NULL; u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; u64 length = blk_rq_bytes(rq); enum obj_operation_type op_type; @@ -4739,28 +4747,24 @@ static void rbd_queue_workfn(struct work_struct *work) blk_mq_start_request(rq); + img_request = rbd_img_request_create(rbd_dev, op_type); + if (!img_request) { + result = -ENOMEM; + goto err_rq; + } + img_request->rq = rq; + down_read(&rbd_dev->header_rwsem); mapping_size = rbd_dev->mapping.size; - if (op_type != OBJ_OP_READ) { - snapc = rbd_dev->header.snapc; - ceph_get_snap_context(snapc); - } + rbd_img_capture_header(img_request); up_read(&rbd_dev->header_rwsem); if (offset + length > mapping_size) { rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset, length, mapping_size); result = -EIO; - goto err_rq; - } - - img_request = rbd_img_request_create(rbd_dev, op_type, snapc); - if (!img_request) { - result = -ENOMEM; - goto err_rq; + goto err_img_request; } - img_request->rq = rq; - snapc = NULL; /* img_request consumes a ref */ dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev, img_request, obj_op_name(op_type), offset, length); @@ -4782,7 +4786,6 @@ err_rq: if (result) rbd_warn(rbd_dev, "%s %llx at %llx result %d", obj_op_name(op_type), length, offset, result); - ceph_put_snap_context(snapc); err: blk_mq_end_request(rq, errno_to_blk_status(result)); } -- cgit v1.2.3 From 59e542c869895fb37005b60058a342187bb63c61 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 12 Feb 2020 15:23:58 +0100 Subject: rbd: embed image request in blk-mq pdu Avoid making allocations for !IMG_REQ_CHILD image requests. Only IMG_REQ_CHILD image requests need to be freed now. Move the initial request checks to rbd_queue_rq(). Unfortunately we can't fill the image request and kick the state machine directly from rbd_queue_rq() because ->queue_rq() isn't allowed to block. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 138 +++++++++++++++++++--------------------------------- 1 file changed, 51 insertions(+), 87 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index acda61f5be03..3af09a0f208b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -337,10 +337,7 @@ struct rbd_img_request { u64 snap_id; /* for reads */ struct ceph_snap_context *snapc; /* for writes */ }; - union { - struct request *rq; /* block request */ - struct rbd_obj_request *obj_request; /* obj req initiator */ - }; + struct rbd_obj_request *obj_request; /* obj req initiator */ struct list_head lock_item; struct list_head object_extents; /* obj_req.ex structs */ @@ -1610,20 +1607,11 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) return counter > 0; } -/* - * Caller is responsible for filling in the list of object requests - * that comprises the image request, and the Linux request pointer - * (if there is one). - */ -static struct rbd_img_request *rbd_img_request_create( - struct rbd_device *rbd_dev, - enum obj_operation_type op_type) +static void rbd_img_request_init(struct rbd_img_request *img_request, + struct rbd_device *rbd_dev, + enum obj_operation_type op_type) { - struct rbd_img_request *img_request; - - img_request = kmem_cache_zalloc(rbd_img_request_cache, GFP_NOIO); - if (!img_request) - return NULL; + memset(img_request, 0, sizeof(*img_request)); img_request->rbd_dev = rbd_dev; img_request->op_type = op_type; @@ -1631,8 +1619,6 @@ static struct rbd_img_request *rbd_img_request_create( INIT_LIST_HEAD(&img_request->lock_item); INIT_LIST_HEAD(&img_request->object_extents); mutex_init(&img_request->state_mutex); - - return img_request; } static void rbd_img_capture_header(struct rbd_img_request *img_req) @@ -1667,7 +1653,8 @@ static void rbd_img_request_destroy(struct rbd_img_request *img_request) if (rbd_img_is_write(img_request)) ceph_put_snap_context(img_request->snapc); - kmem_cache_free(rbd_img_request_cache, img_request); + if (test_bit(IMG_REQ_CHILD, &img_request->flags)) + kmem_cache_free(rbd_img_request_cache, img_request); } #define BITS_PER_OBJ 2 @@ -2834,10 +2821,11 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req) struct rbd_img_request *child_img_req; int ret; - child_img_req = rbd_img_request_create(parent, OBJ_OP_READ); + child_img_req = kmem_cache_alloc(rbd_img_request_cache, GFP_NOIO); if (!child_img_req) return -ENOMEM; + rbd_img_request_init(child_img_req, parent, OBJ_OP_READ); __set_bit(IMG_REQ_CHILD, &child_img_req->flags); child_img_req->obj_request = obj_req; @@ -3638,7 +3626,7 @@ again: goto again; } } else { - struct request *rq = img_req->rq; + struct request *rq = blk_mq_rq_from_pdu(img_req); rbd_img_request_destroy(img_req); blk_mq_end_request(rq, errno_to_blk_status(result)); @@ -4692,68 +4680,25 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, static void rbd_queue_workfn(struct work_struct *work) { - struct request *rq = blk_mq_rq_from_pdu(work); - struct rbd_device *rbd_dev = rq->q->queuedata; - struct rbd_img_request *img_request; + struct rbd_img_request *img_request = + container_of(work, struct rbd_img_request, work); + struct rbd_device *rbd_dev = img_request->rbd_dev; + enum obj_operation_type op_type = img_request->op_type; + struct request *rq = blk_mq_rq_from_pdu(img_request); u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; u64 length = blk_rq_bytes(rq); - enum obj_operation_type op_type; u64 mapping_size; int result; - switch (req_op(rq)) { - case REQ_OP_DISCARD: - op_type = OBJ_OP_DISCARD; - break; - case REQ_OP_WRITE_ZEROES: - op_type = OBJ_OP_ZEROOUT; - break; - case REQ_OP_WRITE: - op_type = OBJ_OP_WRITE; - break; - case REQ_OP_READ: - op_type = OBJ_OP_READ; - break; - default: - dout("%s: non-fs request type %d\n", __func__, req_op(rq)); - result = -EIO; - goto err; - } - /* Ignore/skip any zero-length requests */ - if (!length) { dout("%s: zero-length request\n", __func__); result = 0; - goto err_rq; - } - - if (op_type != OBJ_OP_READ) { - if (rbd_is_ro(rbd_dev)) { - rbd_warn(rbd_dev, "%s on read-only mapping", - obj_op_name(op_type)); - result = -EIO; - goto err; - } - rbd_assert(!rbd_is_snap(rbd_dev)); - } - - if (offset && length > U64_MAX - offset + 1) { - rbd_warn(rbd_dev, "bad request range (%llu~%llu)", offset, - length); - result = -EINVAL; - goto err_rq; /* Shouldn't happen */ + goto err_img_request; } blk_mq_start_request(rq); - img_request = rbd_img_request_create(rbd_dev, op_type); - if (!img_request) { - result = -ENOMEM; - goto err_rq; - } - img_request->rq = rq; - down_read(&rbd_dev->header_rwsem); mapping_size = rbd_dev->mapping.size; rbd_img_capture_header(img_request); @@ -4782,21 +4727,50 @@ static void rbd_queue_workfn(struct work_struct *work) err_img_request: rbd_img_request_destroy(img_request); -err_rq: if (result) rbd_warn(rbd_dev, "%s %llx at %llx result %d", obj_op_name(op_type), length, offset, result); -err: blk_mq_end_request(rq, errno_to_blk_status(result)); } static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { - struct request *rq = bd->rq; - struct work_struct *work = blk_mq_rq_to_pdu(rq); + struct rbd_device *rbd_dev = hctx->queue->queuedata; + struct rbd_img_request *img_req = blk_mq_rq_to_pdu(bd->rq); + enum obj_operation_type op_type; - queue_work(rbd_wq, work); + switch (req_op(bd->rq)) { + case REQ_OP_DISCARD: + op_type = OBJ_OP_DISCARD; + break; + case REQ_OP_WRITE_ZEROES: + op_type = OBJ_OP_ZEROOUT; + break; + case REQ_OP_WRITE: + op_type = OBJ_OP_WRITE; + break; + case REQ_OP_READ: + op_type = OBJ_OP_READ; + break; + default: + rbd_warn(rbd_dev, "unknown req_op %d", req_op(bd->rq)); + return BLK_STS_IOERR; + } + + rbd_img_request_init(img_req, rbd_dev, op_type); + + if (rbd_img_is_write(img_req)) { + if (rbd_is_ro(rbd_dev)) { + rbd_warn(rbd_dev, "%s on read-only mapping", + obj_op_name(img_req->op_type)); + return BLK_STS_IOERR; + } + rbd_assert(!rbd_is_snap(rbd_dev)); + } + + INIT_WORK(&img_req->work, rbd_queue_workfn); + queue_work(rbd_wq, &img_req->work); return BLK_STS_OK; } @@ -4963,18 +4937,8 @@ out: return ret; } -static int rbd_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) -{ - struct work_struct *work = blk_mq_rq_to_pdu(rq); - - INIT_WORK(work, rbd_queue_workfn); - return 0; -} - static const struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, - .init_request = rbd_init_request, }; static int rbd_init_disk(struct rbd_device *rbd_dev) @@ -5007,7 +4971,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) rbd_dev->tag_set.numa_node = NUMA_NO_NODE; rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; rbd_dev->tag_set.nr_hw_queues = 1; - rbd_dev->tag_set.cmd_size = sizeof(struct work_struct); + rbd_dev->tag_set.cmd_size = sizeof(struct rbd_img_request); err = blk_mq_alloc_tag_set(&rbd_dev->tag_set); if (err) -- cgit v1.2.3 From f9b6b98d24f7cec5b8269217f9d4fdec1ca43218 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 31 Jan 2020 11:37:39 +0100 Subject: rbd: enable multiple blk-mq queues Allocate one queue per CPU and get a performance boost from higher parallelism. Signed-off-by: Hannes Reinecke Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3af09a0f208b..1e0a6b19ae0d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4970,7 +4970,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; rbd_dev->tag_set.numa_node = NUMA_NO_NODE; rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - rbd_dev->tag_set.nr_hw_queues = 1; + rbd_dev->tag_set.nr_hw_queues = num_present_cpus(); rbd_dev->tag_set.cmd_size = sizeof(struct rbd_img_request); err = blk_mq_alloc_tag_set(&rbd_dev->tag_set); -- cgit v1.2.3