diff options
-rw-r--r-- | drivers/nvme/host/apple.c | 14 | ||||
-rw-r--r-- | drivers/nvme/host/auth.c | 258 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 125 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 35 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 116 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 35 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 423 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 30 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 32 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 9 | ||||
-rw-r--r-- | drivers/nvme/target/auth.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/configfs.c | 121 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 44 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-file.c | 16 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 8 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 6 | ||||
-rw-r--r-- | include/linux/nvme.h | 2 |
17 files changed, 766 insertions, 510 deletions
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 24e224c279a4..cab69516af5b 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -821,7 +821,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) if (!dead && shutdown && freeze) nvme_wait_freeze_timeout(&anv->ctrl, NVME_IO_TIMEOUT); - nvme_stop_queues(&anv->ctrl); + nvme_quiesce_io_queues(&anv->ctrl); if (!dead) { if (READ_ONCE(anv->ioq.enabled)) { @@ -837,7 +837,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) WRITE_ONCE(anv->ioq.enabled, false); WRITE_ONCE(anv->adminq.enabled, false); mb(); /* ensure that nvme_queue_rq() sees that enabled is cleared */ - nvme_stop_admin_queue(&anv->ctrl); + nvme_quiesce_admin_queue(&anv->ctrl); /* last chance to complete any requests before nvme_cancel_request */ spin_lock_irqsave(&anv->lock, flags); @@ -854,8 +854,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) * deadlocking blk-mq hot-cpu notifier. */ if (shutdown) { - nvme_start_queues(&anv->ctrl); - nvme_start_admin_queue(&anv->ctrl); + nvme_unquiesce_io_queues(&anv->ctrl); + nvme_unquiesce_admin_queue(&anv->ctrl); } } @@ -1093,7 +1093,7 @@ static void apple_nvme_reset_work(struct work_struct *work) dev_dbg(anv->dev, "Starting admin queue"); apple_nvme_init_queue(&anv->adminq); - nvme_start_admin_queue(&anv->ctrl); + nvme_unquiesce_admin_queue(&anv->ctrl); if (!nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_CONNECTING)) { dev_warn(anv->ctrl.device, @@ -1102,7 +1102,7 @@ static void apple_nvme_reset_work(struct work_struct *work) goto out; } - ret = nvme_init_ctrl_finish(&anv->ctrl); + ret = nvme_init_ctrl_finish(&anv->ctrl, false); if (ret) goto out; @@ -1127,7 +1127,7 @@ static void apple_nvme_reset_work(struct work_struct *work) anv->ctrl.queue_count = nr_io_queues + 1; - nvme_start_queues(&anv->ctrl); + nvme_unquiesce_io_queues(&anv->ctrl); nvme_wait_freeze(&anv->ctrl); blk_mq_update_nr_hw_queues(&anv->tagset, 1); nvme_unfreeze(&anv->ctrl); diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index c8a6db7c4498..bb0abbe4491c 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -13,6 +13,10 @@ #include "fabrics.h" #include <linux/nvme-auth.h> +#define CHAP_BUF_SIZE 4096 +static struct kmem_cache *nvme_chap_buf_cache; +static mempool_t *nvme_chap_buf_pool; + struct nvme_dhchap_queue_context { struct list_head entry; struct work_struct auth_work; @@ -20,7 +24,6 @@ struct nvme_dhchap_queue_context { struct crypto_shash *shash_tfm; struct crypto_kpp *dh_tfm; void *buf; - size_t buf_size; int qid; int error; u32 s1; @@ -47,6 +50,12 @@ struct nvme_dhchap_queue_context { #define nvme_auth_queue_from_qid(ctrl, qid) \ (qid == 0) ? (ctrl)->fabrics_q : (ctrl)->connect_q +static inline int ctrl_max_dhchaps(struct nvme_ctrl *ctrl) +{ + return ctrl->opts->nr_io_queues + ctrl->opts->nr_write_queues + + ctrl->opts->nr_poll_queues + 1; +} + static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, void *data, size_t data_len, bool auth_send) { @@ -112,7 +121,7 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl, struct nvmf_auth_dhchap_negotiate_data *data = chap->buf; size_t size = sizeof(*data) + sizeof(union nvmf_auth_protocol); - if (chap->buf_size < size) { + if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; return -EINVAL; } @@ -147,7 +156,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, const char *gid_name = nvme_auth_dhgroup_name(data->dhgid); const char *hmac_name, *kpp_name; - if (chap->buf_size < size) { + if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; return NVME_SC_INVALID_FIELD; } @@ -197,12 +206,6 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, return NVME_SC_AUTH_REQUIRED; } - /* Reset host response if the hash had been changed */ - if (chap->hash_id != data->hashid) { - kfree(chap->host_response); - chap->host_response = NULL; - } - chap->hash_id = data->hashid; chap->hash_len = data->hl; dev_dbg(ctrl->device, "qid %d: selected hash %s\n", @@ -219,14 +222,6 @@ select_kpp: return NVME_SC_AUTH_REQUIRED; } - /* Clear host and controller key to avoid accidental reuse */ - kfree_sensitive(chap->host_key); - chap->host_key = NULL; - chap->host_key_len = 0; - kfree_sensitive(chap->ctrl_key); - chap->ctrl_key = NULL; - chap->ctrl_key_len = 0; - if (chap->dhgroup_id == data->dhgid && (data->dhgid == NVME_AUTH_DHGROUP_NULL || chap->dh_tfm)) { dev_dbg(ctrl->device, @@ -302,7 +297,7 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, if (chap->host_key_len) size += chap->host_key_len; - if (chap->buf_size < size) { + if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; return -EINVAL; } @@ -344,10 +339,10 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, struct nvmf_auth_dhchap_success1_data *data = chap->buf; size_t size = sizeof(*data); - if (ctrl->ctrl_key) + if (chap->ctrl_key) size += chap->hash_len; - if (chap->buf_size < size) { + if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; return NVME_SC_INVALID_FIELD; } @@ -521,6 +516,7 @@ static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl, ret = PTR_ERR(ctrl_response); return ret; } + ret = crypto_shash_setkey(chap->shash_tfm, ctrl_response, ctrl->ctrl_key->len); if (ret) { @@ -621,9 +617,6 @@ static int nvme_auth_dhchap_exponential(struct nvme_ctrl *ctrl, if (ret) { dev_dbg(ctrl->device, "failed to generate public key, error %d\n", ret); - kfree(chap->host_key); - chap->host_key = NULL; - chap->host_key_len = 0; chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; return ret; } @@ -643,9 +636,6 @@ gen_sesskey: if (ret) { dev_dbg(ctrl->device, "failed to generate shared secret, error %d\n", ret); - kfree_sensitive(chap->sess_key); - chap->sess_key = NULL; - chap->sess_key_len = 0; chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; return ret; } @@ -654,7 +644,7 @@ gen_sesskey: return 0; } -static void __nvme_auth_reset(struct nvme_dhchap_queue_context *chap) +static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap) { kfree_sensitive(chap->host_response); chap->host_response = NULL; @@ -674,24 +664,20 @@ static void __nvme_auth_reset(struct nvme_dhchap_queue_context *chap) chap->transaction = 0; memset(chap->c1, 0, sizeof(chap->c1)); memset(chap->c2, 0, sizeof(chap->c2)); + mempool_free(chap->buf, nvme_chap_buf_pool); + chap->buf = NULL; } -static void __nvme_auth_free(struct nvme_dhchap_queue_context *chap) +static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap) { - __nvme_auth_reset(chap); + nvme_auth_reset_dhchap(chap); if (chap->shash_tfm) crypto_free_shash(chap->shash_tfm); if (chap->dh_tfm) crypto_free_kpp(chap->dh_tfm); - kfree_sensitive(chap->ctrl_key); - kfree_sensitive(chap->host_key); - kfree_sensitive(chap->sess_key); - kfree_sensitive(chap->host_response); - kfree(chap->buf); - kfree(chap); } -static void __nvme_auth_work(struct work_struct *work) +static void nvme_queue_auth_work(struct work_struct *work) { struct nvme_dhchap_queue_context *chap = container_of(work, struct nvme_dhchap_queue_context, auth_work); @@ -699,6 +685,16 @@ static void __nvme_auth_work(struct work_struct *work) size_t tl; int ret = 0; + /* + * Allocate a large enough buffer for the entire negotiation: + * 4k is enough to ffdhe8192. + */ + chap->buf = mempool_alloc(nvme_chap_buf_pool, GFP_KERNEL); + if (!chap->buf) { + chap->error = -ENOMEM; + return; + } + chap->transaction = ctrl->transaction++; /* DH-HMAC-CHAP Step 1: send negotiate */ @@ -720,8 +716,9 @@ static void __nvme_auth_work(struct work_struct *work) dev_dbg(ctrl->device, "%s: qid %d receive challenge\n", __func__, chap->qid); - memset(chap->buf, 0, chap->buf_size); - ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, chap->buf_size, false); + memset(chap->buf, 0, CHAP_BUF_SIZE); + ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, CHAP_BUF_SIZE, + false); if (ret) { dev_warn(ctrl->device, "qid %d failed to receive challenge, %s %d\n", @@ -757,11 +754,14 @@ static void __nvme_auth_work(struct work_struct *work) dev_dbg(ctrl->device, "%s: qid %d host response\n", __func__, chap->qid); + mutex_lock(&ctrl->dhchap_auth_mutex); ret = nvme_auth_dhchap_setup_host_response(ctrl, chap); if (ret) { + mutex_unlock(&ctrl->dhchap_auth_mutex); chap->error = ret; goto fail2; } + mutex_unlock(&ctrl->dhchap_auth_mutex); /* DH-HMAC-CHAP Step 3: send reply */ dev_dbg(ctrl->device, "%s: qid %d send reply\n", @@ -783,8 +783,9 @@ static void __nvme_auth_work(struct work_struct *work) dev_dbg(ctrl->device, "%s: qid %d receive success1\n", __func__, chap->qid); - memset(chap->buf, 0, chap->buf_size); - ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, chap->buf_size, false); + memset(chap->buf, 0, CHAP_BUF_SIZE); + ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, CHAP_BUF_SIZE, + false); if (ret) { dev_warn(ctrl->device, "qid %d failed to receive success1, %s %d\n", @@ -801,16 +802,19 @@ static void __nvme_auth_work(struct work_struct *work) return; } + mutex_lock(&ctrl->dhchap_auth_mutex); if (ctrl->ctrl_key) { dev_dbg(ctrl->device, "%s: qid %d controller response\n", __func__, chap->qid); ret = nvme_auth_dhchap_setup_ctrl_response(ctrl, chap); if (ret) { + mutex_unlock(&ctrl->dhchap_auth_mutex); chap->error = ret; goto fail2; } } + mutex_unlock(&ctrl->dhchap_auth_mutex); ret = nvme_auth_process_dhchap_success1(ctrl, chap); if (ret) { @@ -819,7 +823,7 @@ static void __nvme_auth_work(struct work_struct *work) goto fail2; } - if (ctrl->ctrl_key) { + if (chap->ctrl_key) { /* DH-HMAC-CHAP Step 5: send success2 */ dev_dbg(ctrl->device, "%s: qid %d send success2\n", __func__, chap->qid); @@ -860,42 +864,8 @@ int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid) return -ENOKEY; } - mutex_lock(&ctrl->dhchap_auth_mutex); - /* Check if the context is already queued */ - list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) { - WARN_ON(!chap->buf); - if (chap->qid == qid) { - dev_dbg(ctrl->device, "qid %d: re-using context\n", qid); - mutex_unlock(&ctrl->dhchap_auth_mutex); - flush_work(&chap->auth_work); - __nvme_auth_reset(chap); - queue_work(nvme_wq, &chap->auth_work); - return 0; - } - } - chap = kzalloc(sizeof(*chap), GFP_KERNEL); - if (!chap) { - mutex_unlock(&ctrl->dhchap_auth_mutex); - return -ENOMEM; - } - chap->qid = (qid == NVME_QID_ANY) ? 0 : qid; - chap->ctrl = ctrl; - - /* - * Allocate a large enough buffer for the entire negotiation: - * 4k should be enough to ffdhe8192. - */ - chap->buf_size = 4096; - chap->buf = kzalloc(chap->buf_size, GFP_KERNEL); - if (!chap->buf) { - mutex_unlock(&ctrl->dhchap_auth_mutex); - kfree(chap); - return -ENOMEM; - } - - INIT_WORK(&chap->auth_work, __nvme_auth_work); - list_add(&chap->entry, &ctrl->dhchap_auth_list); - mutex_unlock(&ctrl->dhchap_auth_mutex); + chap = &ctrl->dhchap_ctxs[qid]; + cancel_work_sync(&chap->auth_work); queue_work(nvme_wq, &chap->auth_work); return 0; } @@ -906,40 +876,28 @@ int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid) struct nvme_dhchap_queue_context *chap; int ret; - mutex_lock(&ctrl->dhchap_auth_mutex); - list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) { - if (chap->qid != qid) - continue; - mutex_unlock(&ctrl->dhchap_auth_mutex); - flush_work(&chap->auth_work); - ret = chap->error; - return ret; - } - mutex_unlock(&ctrl->dhchap_auth_mutex); - return -ENXIO; + chap = &ctrl->dhchap_ctxs[qid]; + flush_work(&chap->auth_work); + ret = chap->error; + /* clear sensitive info */ + nvme_auth_reset_dhchap(chap); + return ret; } EXPORT_SYMBOL_GPL(nvme_auth_wait); -void nvme_auth_reset(struct nvme_ctrl *ctrl) -{ - struct nvme_dhchap_queue_context *chap; - - mutex_lock(&ctrl->dhchap_auth_mutex); - list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) { - mutex_unlock(&ctrl->dhchap_auth_mutex); - flush_work(&chap->auth_work); - __nvme_auth_reset(chap); - } - mutex_unlock(&ctrl->dhchap_auth_mutex); -} -EXPORT_SYMBOL_GPL(nvme_auth_reset); - -static void nvme_dhchap_auth_work(struct work_struct *work) +static void nvme_ctrl_auth_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, dhchap_auth_work); int ret, q; + /* + * If the ctrl is no connected, bail as reconnect will handle + * authentication. + */ + if (ctrl->state != NVME_CTRL_LIVE) + return; + /* Authenticate admin queue first */ ret = nvme_auth_negotiate(ctrl, 0); if (ret) { @@ -968,43 +926,75 @@ static void nvme_dhchap_auth_work(struct work_struct *work) * Failure is a soft-state; credentials remain valid until * the controller terminates the connection. */ + for (q = 1; q < ctrl->queue_count; q++) { + ret = nvme_auth_wait(ctrl, q); + if (ret) + dev_warn(ctrl->device, + "qid %d: authentication failed\n", q); + } } -void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) +int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) { - INIT_LIST_HEAD(&ctrl->dhchap_auth_list); - INIT_WORK(&ctrl->dhchap_auth_work, nvme_dhchap_auth_work); + struct nvme_dhchap_queue_context *chap; + int i, ret; + mutex_init(&ctrl->dhchap_auth_mutex); + INIT_WORK(&ctrl->dhchap_auth_work, nvme_ctrl_auth_work); if (!ctrl->opts) - return; - nvme_auth_generate_key(ctrl->opts->dhchap_secret, &ctrl->host_key); - nvme_auth_generate_key(ctrl->opts->dhchap_ctrl_secret, &ctrl->ctrl_key); + return 0; + ret = nvme_auth_generate_key(ctrl->opts->dhchap_secret, + &ctrl->host_key); + if (ret) + return ret; + ret = nvme_auth_generate_key(ctrl->opts->dhchap_ctrl_secret, + &ctrl->ctrl_key); + if (ret) + goto err_free_dhchap_secret; + + if (!ctrl->opts->dhchap_secret && !ctrl->opts->dhchap_ctrl_secret) + return ret; + + ctrl->dhchap_ctxs = kvcalloc(ctrl_max_dhchaps(ctrl), + sizeof(*chap), GFP_KERNEL); + if (!ctrl->dhchap_ctxs) { + ret = -ENOMEM; + goto err_free_dhchap_ctrl_secret; + } + + for (i = 0; i < ctrl_max_dhchaps(ctrl); i++) { + chap = &ctrl->dhchap_ctxs[i]; + chap->qid = i; + chap->ctrl = ctrl; + INIT_WORK(&chap->auth_work, nvme_queue_auth_work); + } + + return 0; +err_free_dhchap_ctrl_secret: + nvme_auth_free_key(ctrl->ctrl_key); + ctrl->ctrl_key = NULL; +err_free_dhchap_secret: + nvme_auth_free_key(ctrl->host_key); + ctrl->host_key = NULL; + return ret; } EXPORT_SYMBOL_GPL(nvme_auth_init_ctrl); void nvme_auth_stop(struct nvme_ctrl *ctrl) { - struct nvme_dhchap_queue_context *chap = NULL, *tmp; - cancel_work_sync(&ctrl->dhchap_auth_work); - mutex_lock(&ctrl->dhchap_auth_mutex); - list_for_each_entry_safe(chap, tmp, &ctrl->dhchap_auth_list, entry) - cancel_work_sync(&chap->auth_work); - mutex_unlock(&ctrl->dhchap_auth_mutex); } EXPORT_SYMBOL_GPL(nvme_auth_stop); void nvme_auth_free(struct nvme_ctrl *ctrl) { - struct nvme_dhchap_queue_context *chap = NULL, *tmp; + int i; - mutex_lock(&ctrl->dhchap_auth_mutex); - list_for_each_entry_safe(chap, tmp, &ctrl->dhchap_auth_list, entry) { - list_del_init(&chap->entry); - flush_work(&chap->auth_work); - __nvme_auth_free(chap); + if (ctrl->dhchap_ctxs) { + for (i = 0; i < ctrl_max_dhchaps(ctrl); i++) + nvme_auth_free_dhchap(&ctrl->dhchap_ctxs[i]); + kfree(ctrl->dhchap_ctxs); } - mutex_unlock(&ctrl->dhchap_auth_mutex); if (ctrl->host_key) { nvme_auth_free_key(ctrl->host_key); ctrl->host_key = NULL; @@ -1015,3 +1005,27 @@ void nvme_auth_free(struct nvme_ctrl *ctrl) } } EXPORT_SYMBOL_GPL(nvme_auth_free); + +int __init nvme_init_auth(void) +{ + nvme_chap_buf_cache = kmem_cache_create("nvme-chap-buf-cache", + CHAP_BUF_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL); + if (!nvme_chap_buf_cache) + return -ENOMEM; + + nvme_chap_buf_pool = mempool_create(16, mempool_alloc_slab, + mempool_free_slab, nvme_chap_buf_cache); + if (!nvme_chap_buf_pool) + goto err_destroy_chap_buf_cache; + + return 0; +err_destroy_chap_buf_cache: + kmem_cache_destroy(nvme_chap_buf_cache); + return -ENOMEM; +} + +void __exit nvme_exit_auth(void) +{ + mempool_destroy(nvme_chap_buf_pool); + kmem_cache_destroy(nvme_chap_buf_cache); +} diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f94b05c585cb..3195ae17df30 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -850,8 +850,11 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC)) + cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC); + if (nvme_ns_has_pi(ns)) { - cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT); + cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT); switch (ns->pi_type) { case NVME_NS_DPS_PI_TYPE1: @@ -1120,8 +1123,10 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, mutex_unlock(&ctrl->subsys->lock); mutex_unlock(&ctrl->scan_lock); } - if (effects & NVME_CMD_EFFECTS_CCC) - nvme_init_ctrl_finish(ctrl); + if (effects & NVME_CMD_EFFECTS_CCC) { + dev_info(ctrl->device, +"controller capabilities changed, reset may be required to take effect.\n"); + } if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { nvme_queue_scan(ctrl); flush_work(&ctrl->scan_work); @@ -2003,6 +2008,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, } } + /* + * Only set the DEAC bit if the device guarantees that reads from + * deallocated data return zeroes. While the DEAC bit does not + * require that, it must be a no-op if reads from deallocated data + * do not return zeroes. + */ + if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) + ns->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); @@ -2179,7 +2192,7 @@ const struct pr_ops nvme_pr_ops = { }; #ifdef CONFIG_BLK_SED_OPAL -int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, +static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send) { struct nvme_ctrl *ctrl = data; @@ -2196,7 +2209,23 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, NVME_QID_ANY, 1, 0); } -EXPORT_SYMBOL_GPL(nvme_sec_submit); + +static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended) +{ + if (ctrl->oacs & NVME_CTRL_OACS_SEC_SUPP) { + if (!ctrl->opal_dev) + ctrl->opal_dev = init_opal_dev(ctrl, &nvme_sec_submit); + else if (was_suspended) + opal_unlock_from_suspend(ctrl->opal_dev); + } else { + free_opal_dev(ctrl->opal_dev); + ctrl->opal_dev = NULL; + } +} +#else +static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended) +{ +} #endif /* CONFIG_BLK_SED_OPAL */ #ifdef CONFIG_BLK_DEV_ZONED @@ -3049,7 +3078,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) id = kzalloc(sizeof(*id), GFP_KERNEL); if (!id) - return 0; + return -ENOMEM; c.identify.opcode = nvme_admin_identify; c.identify.cns = NVME_ID_CNS_CS_CTRL; @@ -3229,7 +3258,7 @@ out_free: * register in our nvme_ctrl structure. This should be called as soon as * the admin queue is fully up and running. */ -int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) +int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended) { int ret; @@ -3260,6 +3289,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) if (ret < 0) return ret; + nvme_configure_opal(ctrl, was_suspended); + if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) { /* * Do not return errors unless we are in a controller reset, @@ -3745,15 +3776,19 @@ static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev, memcpy(dhchap_secret, buf, count); nvme_auth_stop(ctrl); if (strcmp(dhchap_secret, opts->dhchap_secret)) { + struct nvme_dhchap_key *key, *host_key; int ret; - ret = nvme_auth_generate_key(dhchap_secret, &ctrl->host_key); + ret = nvme_auth_generate_key(dhchap_secret, &key); if (ret) return ret; kfree(opts->dhchap_secret); opts->dhchap_secret = dhchap_secret; - /* Key has changed; re-authentication with new key */ - nvme_auth_reset(ctrl); + host_key = ctrl->host_key; + mutex_lock(&ctrl->dhchap_auth_mutex); + ctrl->host_key = key; + mutex_unlock(&ctrl->dhchap_auth_mutex); + nvme_auth_free_key(host_key); } /* Start re-authentication */ dev_info(ctrl->device, "re-authenticating controller\n"); @@ -3795,15 +3830,19 @@ static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev, memcpy(dhchap_secret, buf, count); nvme_auth_stop(ctrl); if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) { + struct nvme_dhchap_key *key, *ctrl_key; int ret; - ret = nvme_auth_generate_key(dhchap_secret, &ctrl->ctrl_key); + ret = nvme_auth_generate_key(dhchap_secret, &key); if (ret) return ret; kfree(opts->dhchap_ctrl_secret); opts->dhchap_ctrl_secret = dhchap_secret; - /* Key has changed; re-authentication with new key */ - nvme_auth_reset(ctrl); + ctrl_key = ctrl->ctrl_key; + mutex_lock(&ctrl->dhchap_auth_mutex); + ctrl->ctrl_key = key; + mutex_unlock(&ctrl->dhchap_auth_mutex); + nvme_auth_free_key(ctrl_key); } /* Start re-authentication */ dev_info(ctrl->device, "re-authenticating controller\n"); @@ -3875,10 +3914,11 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, return a->mode; } -static const struct attribute_group nvme_dev_attrs_group = { +const struct attribute_group nvme_dev_attrs_group = { .attrs = nvme_dev_attrs, .is_visible = nvme_dev_attrs_are_visible, }; +EXPORT_SYMBOL_GPL(nvme_dev_attrs_group); static const struct attribute_group *nvme_dev_attr_groups[] = { &nvme_dev_attrs_group, @@ -4420,9 +4460,6 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) u32 prev = 0; int ret = 0, i; - if (nvme_ctrl_limited_cns(ctrl)) - return -EOPNOTSUPP; - ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); if (!ns_list) return -ENOMEM; @@ -4530,8 +4567,18 @@ static void nvme_scan_work(struct work_struct *work) } mutex_lock(&ctrl->scan_lock); - if (nvme_scan_ns_list(ctrl) != 0) + if (nvme_ctrl_limited_cns(ctrl)) { nvme_scan_ns_sequential(ctrl); + } else { + /* + * Fall back to sequential scan if DNR is set to handle broken + * devices which should support Identify NS List (as per the VS + * they report) but don't actually support it. + */ + ret = nvme_scan_ns_list(ctrl); + if (ret > 0 && ret & NVME_SC_DNR) + nvme_scan_ns_sequential(ctrl); + } mutex_unlock(&ctrl->scan_lock); } @@ -4563,7 +4610,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) */ if (ctrl->state == NVME_CTRL_DEAD) { nvme_mark_namespaces_dead(ctrl); - nvme_start_queues(ctrl); + nvme_unquiesce_io_queues(ctrl); } /* this is a no-op when called from the controller reset handler */ @@ -4690,7 +4737,7 @@ static void nvme_fw_act_work(struct work_struct *work) fw_act_timeout = jiffies + msecs_to_jiffies(admin_timeout * 1000); - nvme_stop_queues(ctrl); + nvme_quiesce_io_queues(ctrl); while (nvme_ctrl_pp_status(ctrl)) { if (time_after(jiffies, fw_act_timeout)) { dev_warn(ctrl->device, @@ -4704,7 +4751,7 @@ static void nvme_fw_act_work(struct work_struct *work) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) return; - nvme_start_queues(ctrl); + nvme_unquiesce_io_queues(ctrl); /* read FW slot information to clear the AER */ nvme_get_fw_slot_info(ctrl); @@ -4949,7 +4996,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); - nvme_start_queues(ctrl); + nvme_unquiesce_io_queues(ctrl); nvme_mpath_update(ctrl); } @@ -4994,6 +5041,7 @@ static void nvme_free_ctrl(struct device *dev) nvme_auth_stop(ctrl); nvme_auth_free(ctrl); __free_page(ctrl->discard_page); + free_opal_dev(ctrl->opal_dev); if (subsys) { mutex_lock(&nvme_subsystems_lock); @@ -5059,7 +5107,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->instance); ctrl->device->class = nvme_class; ctrl->device->parent = ctrl->dev; - ctrl->device->groups = nvme_dev_attr_groups; + if (ops->dev_attr_groups) + ctrl->device->groups = ops->dev_attr_groups; + else + ctrl->device->groups = nvme_dev_attr_groups; ctrl->device->release = nvme_free_ctrl; dev_set_drvdata(ctrl->device, ctrl); ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance); @@ -5083,9 +5134,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device)); nvme_mpath_init_ctrl(ctrl); - nvme_auth_init_ctrl(ctrl); + ret = nvme_auth_init_ctrl(ctrl); + if (ret) + goto out_free_cdev; return 0; +out_free_cdev: + cdev_device_del(&ctrl->cdev, ctrl->device); out_free_name: nvme_put_ctrl(ctrl); kfree_const(ctrl->device->kobj.name); @@ -5158,37 +5213,37 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_freeze); -void nvme_stop_queues(struct nvme_ctrl *ctrl) +void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl) { if (!test_and_set_bit(NVME_CTRL_STOPPED, &ctrl->flags)) blk_mq_quiesce_tagset(ctrl->tagset); else blk_mq_wait_quiesce_done(ctrl->tagset); } -EXPORT_SYMBOL_GPL(nvme_stop_queues); +EXPORT_SYMBOL_GPL(nvme_quiesce_io_queues); -void nvme_start_queues(struct nvme_ctrl *ctrl) +void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl) { if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags)) blk_mq_unquiesce_tagset(ctrl->tagset); } -EXPORT_SYMBOL_GPL(nvme_start_queues); +EXPORT_SYMBOL_GPL(nvme_unquiesce_io_queues); -void nvme_stop_admin_queue(struct nvme_ctrl *ctrl) +void nvme_quiesce_admin_queue(struct nvme_ctrl *ctrl) { if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags)) blk_mq_quiesce_queue(ctrl->admin_q); else blk_mq_wait_quiesce_done(ctrl->admin_q->tag_set); } -EXPORT_SYMBOL_GPL(nvme_stop_admin_queue); +EXPORT_SYMBOL_GPL(nvme_quiesce_admin_queue); -void nvme_start_admin_queue(struct nvme_ctrl *ctrl) +void nvme_unquiesce_admin_queue(struct nvme_ctrl *ctrl) { if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags)) blk_mq_unquiesce_queue(ctrl->admin_q); } -EXPORT_SYMBOL_GPL(nvme_start_admin_queue); +EXPORT_SYMBOL_GPL(nvme_unquiesce_admin_queue); void nvme_sync_io_queues(struct nvme_ctrl *ctrl) { @@ -5299,8 +5354,13 @@ static int __init nvme_core_init(void) goto unregister_generic_ns; } + result = nvme_init_auth(); + if (result) + goto destroy_ns_chr; return 0; +destroy_ns_chr: + class_destroy(nvme_ns_chr_class); unregister_generic_ns: unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); destroy_subsys_class: @@ -5321,6 +5381,7 @@ out: static void __exit nvme_core_exit(void) { + nvme_exit_auth(); class_destroy(nvme_ns_chr_class); class_destroy(nvme_subsys_class); class_destroy(nvme_class); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5d57a042dbca..aa5fb56c07d9 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1475,6 +1475,8 @@ nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) fc_dma_unmap_single(lport->dev, lsop->rspdma, sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + kfree(lsop->rspbuf); + kfree(lsop->rqstbuf); kfree(lsop); nvme_fc_rport_put(rport); @@ -1751,20 +1753,17 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, goto out_put; } - lsop = kzalloc(sizeof(*lsop) + - sizeof(union nvmefc_ls_requests) + - sizeof(union nvmefc_ls_responses), - GFP_KERNEL); - if (!lsop) { + lsop = kzalloc(sizeof(*lsop), GFP_KERNEL); + lsop->rqstbuf = kzalloc(sizeof(*lsop->rqstbuf), GFP_KERNEL); + lsop->rspbuf = kzalloc(sizeof(*lsop->rspbuf), GFP_KERNEL); + if (!lsop || !lsop->rqstbuf || !lsop->rspbuf) { dev_info(lport->dev, "RCV %s LS failed: No memory\n", (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? nvmefc_ls_names[w0->ls_cmd] : ""); ret = -ENOMEM; - goto out_put; + goto out_free; } - lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1]; - lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1]; lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf, sizeof(*lsop->rspbuf), @@ -1801,6 +1800,8 @@ out_unmap: fc_dma_unmap_single(lport->dev, lsop->rspdma, sizeof(*lsop->rspbuf), DMA_TO_DEVICE); out_free: + kfree(lsop->rspbuf); + kfree(lsop->rqstbuf); kfree(lsop); out_put: nvme_fc_rport_put(rport); @@ -2391,7 +2392,7 @@ nvme_fc_ctrl_free(struct kref *ref) list_del(&ctrl->ctrl_list); spin_unlock_irqrestore(&ctrl->rport->lock, flags); - nvme_start_admin_queue(&ctrl->ctrl); + nvme_unquiesce_admin_queue(&ctrl->ctrl); nvme_remove_admin_tag_set(&ctrl->ctrl); kfree(ctrl->queues); @@ -2492,13 +2493,13 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) * (but with error status). */ if (ctrl->ctrl.queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); + nvme_quiesce_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->tag_set); if (start_queues) - nvme_start_queues(&ctrl->ctrl); + nvme_unquiesce_io_queues(&ctrl->ctrl); } /* @@ -2516,13 +2517,13 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) /* * clean up the admin queue. Same thing as above. */ - nvme_stop_admin_queue(&ctrl->ctrl); + nvme_quiesce_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); if (start_queues) - nvme_start_admin_queue(&ctrl->ctrl); + nvme_unquiesce_admin_queue(&ctrl->ctrl); } static void @@ -3104,9 +3105,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments << (ilog2(SZ_4K) - 9); - nvme_start_admin_queue(&ctrl->ctrl); + nvme_unquiesce_admin_queue(&ctrl->ctrl); - ret = nvme_init_ctrl_finish(&ctrl->ctrl); + ret = nvme_init_ctrl_finish(&ctrl->ctrl, false); if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) goto out_disconnect_admin_queue; @@ -3250,10 +3251,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) nvme_fc_free_queue(&ctrl->queues[0]); /* re-enable the admin_q so anything new can fast fail */ - nvme_start_admin_queue(&ctrl->ctrl); + nvme_unquiesce_admin_queue(&ctrl->ctrl); /* resume the io queues so that things will fast fail */ - nvme_start_queues(&ctrl->ctrl); + nvme_unquiesce_io_queues(&ctrl->ctrl); nvme_fc_ctlr_inactive_on_rport(ctrl); } diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 81f5550b670d..9550a69029b3 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -8,6 +8,48 @@ #include <linux/io_uring.h> #include "nvme.h" +static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, + fmode_t mode) +{ + if (capable(CAP_SYS_ADMIN)) + return true; + + /* + * Do not allow unprivileged processes to send vendor specific or fabrics + * commands as we can't be sure about their effects. + */ + if (c->common.opcode >= nvme_cmd_vendor_start || + c->common.opcode == nvme_fabrics_command) + return false; + + /* + * Do not allow unprivileged passthrough of admin commands except + * for a subset of identify commands that contain information required + * to form proper I/O commands in userspace and do not expose any + * potentially sensitive information. + */ + if (!ns) { + if (c->common.opcode == nvme_admin_identify) { + switch (c->identify.cns) { + case NVME_ID_CNS_NS: + case NVME_ID_CNS_CS_NS: + case NVME_ID_CNS_NS_CS_INDEP: + return true; + } + } + return false; + } + + /* + * Only allow I/O commands that transfer data to the controller if the + * special file is open for writing, but always allow I/O commands that + * transfer data from the controller. + */ + if (nvme_is_write(c)) + return mode & FMODE_WRITE; + return true; +} + /* * Convert integer values from ioctl structures to user pointers, silently * ignoring the upper bits in the compat case to match behaviour of 32-bit @@ -261,7 +303,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, } static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd __user *ucmd) + struct nvme_passthru_cmd __user *ucmd, fmode_t mode) { struct nvme_passthru_cmd cmd; struct nvme_command c; @@ -269,8 +311,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u64 result; int status; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; if (copy_from_user(&cmd, ucmd, sizeof(cmd))) return -EFAULT; if (cmd.flags) @@ -291,6 +331,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); + if (!nvme_cmd_allowed(ns, &c, mode)) + return -EACCES; + if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); @@ -308,15 +351,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd64 __user *ucmd, bool vec) + struct nvme_passthru_cmd64 __user *ucmd, bool vec, + fmode_t mode) { struct nvme_passthru_cmd64 cmd; struct nvme_command c; unsigned timeout = 0; int status; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; if (copy_from_user(&cmd, ucmd, sizeof(cmd))) return -EFAULT; if (cmd.flags) @@ -337,6 +379,9 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); + if (!nvme_cmd_allowed(ns, &c, mode)) + return -EACCES; + if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); @@ -483,9 +528,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, void *meta = NULL; int ret; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - c.common.opcode = READ_ONCE(cmd->opcode); c.common.flags = READ_ONCE(cmd->flags); if (c.common.flags) @@ -507,6 +549,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); + if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode)) + return -EACCES; + d.metadata = READ_ONCE(cmd->metadata); d.addr = READ_ONCE(cmd->addr); d.data_len = READ_ONCE(cmd->data_len); @@ -570,13 +615,13 @@ static bool is_ctrl_ioctl(unsigned int cmd) } static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, - void __user *argp) + void __user *argp, fmode_t mode) { switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp); + return nvme_user_cmd(ctrl, NULL, argp, mode); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, false); + return nvme_user_cmd64(ctrl, NULL, argp, false, mode); default: return sed_ioctl(ctrl->opal_dev, cmd, argp); } @@ -601,14 +646,14 @@ struct nvme_user_io32 { #endif /* COMPAT_FOR_U64_ALIGNMENT */ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp) + void __user *argp, fmode_t mode) { switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); return ns->head->ns_id; case NVME_IOCTL_IO_CMD: - return nvme_user_cmd(ns->ctrl, ns, argp); + return nvme_user_cmd(ns->ctrl, ns, argp, mode); /* * struct nvme_user_io can have different padding on some 32-bit ABIs. * Just accept the compat version as all fields that are used are the @@ -620,19 +665,20 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, argp); case NVME_IOCTL_IO64_CMD: - return nvme_user_cmd64(ns->ctrl, ns, argp, false); + return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode); case NVME_IOCTL_IO64_CMD_VEC: - return nvme_user_cmd64(ns->ctrl, ns, argp, true); + return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode); default: return -ENOTTY; } } -static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg) +static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg, + fmode_t mode) { - if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, arg); - return nvme_ns_ioctl(ns, cmd, arg); + if (is_ctrl_ioctl(cmd)) + return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode); + return nvme_ns_ioctl(ns, cmd, arg, mode); } int nvme_ioctl(struct block_device *bdev, fmode_t mode, @@ -640,7 +686,7 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode, { struct nvme_ns *ns = bdev->bd_disk->private_data; - return __nvme_ioctl(ns, cmd, (void __user *)arg); + return __nvme_ioctl(ns, cmd, (void __user *)arg, mode); } long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -648,7 +694,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct nvme_ns *ns = container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); - return __nvme_ioctl(ns, cmd, (void __user *)arg); + return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode); } static int nvme_uring_cmd_checks(unsigned int issue_flags) @@ -716,7 +762,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, } #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp, struct nvme_ns_head *head, int srcu_idx) + void __user *argp, struct nvme_ns_head *head, int srcu_idx, + fmode_t mode) __releases(&head->srcu) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -724,7 +771,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, nvme_get_ctrl(ns->ctrl); srcu_read_unlock(&head->srcu, srcu_idx); - ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); + ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); nvme_put_ctrl(ctrl); return ret; @@ -749,9 +796,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, * deadlock when deleting namespaces using the passthrough interface. */ if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, + mode); - ret = nvme_ns_ioctl(ns, cmd, argp); + ret = nvme_ns_ioctl(ns, cmd, argp, mode); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -773,9 +821,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, goto out_unlock; if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, + file->f_mode); - ret = nvme_ns_ioctl(ns, cmd, argp); + ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -849,7 +898,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) return ret; } -static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) +static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, + fmode_t mode) { struct nvme_ns *ns; int ret; @@ -873,7 +923,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) kref_get(&ns->kref); up_read(&ctrl->namespaces_rwsem); - ret = nvme_user_cmd(ctrl, ns, argp); + ret = nvme_user_cmd(ctrl, ns, argp, mode); nvme_put_ns(ns); return ret; @@ -890,11 +940,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp); + return nvme_user_cmd(ctrl, NULL, argp, file->f_mode); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, false); + return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode); case NVME_IOCTL_IO_CMD: - return nvme_dev_user_cmd(ctrl, argp); + return nvme_dev_user_cmd(ctrl, argp, file->f_mode); case NVME_IOCTL_RESET: if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f9df10653f3c..b3a1c595d144 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -337,8 +337,8 @@ struct nvme_ctrl { #ifdef CONFIG_NVME_AUTH struct work_struct dhchap_auth_work; - struct list_head dhchap_auth_list; struct mutex dhchap_auth_mutex; + struct nvme_dhchap_queue_context *dhchap_ctxs; struct nvme_dhchap_key *host_key; struct nvme_dhchap_key *ctrl_key; u16 transaction; @@ -455,6 +455,7 @@ static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head) enum nvme_ns_features { NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ + NVME_NS_DEAC, /* DEAC bit in Write Zeores supported */ }; struct nvme_ns { @@ -507,6 +508,7 @@ struct nvme_ctrl_ops { unsigned int flags; #define NVME_F_FABRICS (1 << 0) #define NVME_F_METADATA_SUPPORTED (1 << 1) + const struct attribute_group **dev_attr_groups; int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); @@ -735,7 +737,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); -int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl); +int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended); int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int flags, unsigned int cmd_size); @@ -747,16 +749,13 @@ void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); -int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, - bool send); - void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, volatile union nvme_result *res); -void nvme_stop_queues(struct nvme_ctrl *ctrl); -void nvme_start_queues(struct nvme_ctrl *ctrl); -void nvme_stop_admin_queue(struct nvme_ctrl *ctrl); -void nvme_start_admin_queue(struct nvme_ctrl *ctrl); +void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl); +void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl); +void nvme_quiesce_admin_queue(struct nvme_ctrl *ctrl); +void nvme_unquiesce_admin_queue(struct nvme_ctrl *ctrl); void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl); void nvme_sync_queues(struct nvme_ctrl *ctrl); void nvme_sync_io_queues(struct nvme_ctrl *ctrl); @@ -856,6 +855,7 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct pr_ops nvme_pr_ops; extern const struct block_device_operations nvme_ns_head_ops; +extern const struct attribute_group nvme_dev_attrs_group; struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); #ifdef CONFIG_NVME_MULTIPATH @@ -1018,14 +1018,25 @@ static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl) } #ifdef CONFIG_NVME_AUTH -void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl); +int __init nvme_init_auth(void); +void __exit nvme_exit_auth(void); +int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl); void nvme_auth_stop(struct nvme_ctrl *ctrl); int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid); int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid); -void nvme_auth_reset(struct nvme_ctrl *ctrl); void nvme_auth_free(struct nvme_ctrl *ctrl); #else -static inline void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) {}; +static inline int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) +{ + return 0; +} +static inline int __init nvme_init_auth(void) +{ + return 0; +} +static inline void __exit nvme_exit_auth(void) +{ +} static inline void nvme_auth_stop(struct nvme_ctrl *ctrl) {}; static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 208c387f1558..bd5fcdc9211c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -130,7 +130,6 @@ struct nvme_dev { u32 db_stride; void __iomem *bar; unsigned long bar_mapped_size; - struct work_struct remove_work; struct mutex shutdown_lock; bool subsystem; u64 cmb_size; @@ -158,8 +157,6 @@ struct nvme_dev { unsigned int nr_allocated_queues; unsigned int nr_write_queues; unsigned int nr_poll_queues; - - bool attrs_added; }; static int io_queue_depth_set(const char *val, const struct kernel_param *kp) @@ -241,10 +238,13 @@ static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) return dev->nr_allocated_queues * 8 * dev->db_stride; } -static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) +static void nvme_dbbuf_dma_alloc(struct nvme_dev *dev) { unsigned int mem_size = nvme_dbbuf_size(dev); + if (!(dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP)) + return; + if (dev->dbbuf_dbs) { /* * Clear the dbbuf memory so the driver doesn't observe stale @@ -252,25 +252,27 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) */ memset(dev->dbbuf_dbs, 0, mem_size); memset(dev->dbbuf_eis, 0, mem_size); - return 0; + return; } dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, &dev->dbbuf_dbs_dma_addr, GFP_KERNEL); if (!dev->dbbuf_dbs) - return -ENOMEM; + goto fail; dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size, &dev->dbbuf_eis_dma_addr, GFP_KERNEL); - if (!dev->dbbuf_eis) { - dma_free_coherent(dev->dev, mem_size, - dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); - dev->dbbuf_dbs = NULL; - return -ENOMEM; - } + if (!dev->dbbuf_eis) + goto fail_free_dbbuf_dbs; + return; - return 0; +fail_free_dbbuf_dbs: + dma_free_coherent(dev->dev, mem_size, dev->dbbuf_dbs, + dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; +fail: + dev_warn(dev->dev, "unable to allocate dma for dbbuf\n"); } static void nvme_dbbuf_dma_free(struct nvme_dev *dev) @@ -392,14 +394,6 @@ static int nvme_pci_npages_sgl(void) PAGE_SIZE); } -static size_t nvme_pci_iod_alloc_size(void) -{ - size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl()); - - return sizeof(__le64 *) * npages + - sizeof(struct scatterlist) * NVME_MAX_SEGS; -} - static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -1487,7 +1481,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) nvmeq->dev->online_queues--; if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) - nvme_stop_admin_queue(&nvmeq->dev->ctrl); + nvme_quiesce_admin_queue(&nvmeq->dev->ctrl); if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); return 0; @@ -1747,8 +1741,9 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) * user requests may be waiting on a stopped queue. Start the * queue to flush these to completion. */ - nvme_start_admin_queue(&dev->ctrl); + nvme_unquiesce_admin_queue(&dev->ctrl); blk_mq_destroy_queue(dev->ctrl.admin_q); + blk_put_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } } @@ -2106,6 +2101,9 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) u32 enable_bits = NVME_HOST_MEM_ENABLE; int ret; + if (!dev->ctrl.hmpre) + return 0; + preferred = min(preferred, max); if (min > max) { dev_warn(dev->ctrl.device, @@ -2234,11 +2232,17 @@ static struct attribute *nvme_pci_attrs[] = { NULL, }; -static const struct attribute_group nvme_pci_attr_group = { +static const struct attribute_group nvme_pci_dev_attrs_group = { .attrs = nvme_pci_attrs, .is_visible = nvme_pci_attrs_are_visible, }; +static const struct attribute_group *nvme_pci_dev_attr_groups[] = { + &nvme_dev_attrs_group, + &nvme_pci_dev_attrs_group, + NULL, +}; + /* * nirqs is the number of interrupts available for write and read * queues. The core already reserved an interrupt for the admin queue. @@ -2642,7 +2646,8 @@ static int nvme_pci_enable(struct nvme_dev *dev) pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); - return 0; + + return nvme_pci_configure_admin_queue(dev); disable: pci_disable_device(pdev); @@ -2698,7 +2703,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (!dead && shutdown && freeze) nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); - nvme_stop_queues(&dev->ctrl); + nvme_quiesce_io_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { nvme_disable_io_queues(dev); @@ -2718,9 +2723,9 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * deadlocking blk-mq hot-cpu notifier. */ if (shutdown) { - nvme_start_queues(&dev->ctrl); + nvme_unquiesce_io_queues(&dev->ctrl); if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) - nvme_start_admin_queue(&dev->ctrl); + nvme_unquiesce_admin_queue(&dev->ctrl); } mutex_unlock(&dev->shutdown_lock); } @@ -2757,6 +2762,22 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) dma_pool_destroy(dev->prp_small_pool); } +static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) +{ + size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl()); + size_t alloc_size = sizeof(__le64 *) * npages + + sizeof(struct scatterlist) * NVME_MAX_SEGS; + + WARN_ON_ONCE(alloc_size > PAGE_SIZE); + dev->iod_mempool = mempool_create_node(1, + mempool_kmalloc, mempool_kfree, + (void *)alloc_size, GFP_KERNEL, + dev_to_node(dev->dev)); + if (!dev->iod_mempool) + return -ENOMEM; + return 0; +} + static void nvme_free_tagset(struct nvme_dev *dev) { if (dev->tagset.tags) @@ -2764,35 +2785,17 @@ static void nvme_free_tagset(struct nvme_dev *dev) dev->ctrl.tagset = NULL; } +/* pairs with nvme_pci_alloc_dev */ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); - nvme_dbbuf_dma_free(dev); nvme_free_tagset(dev); - if (dev->ctrl.admin_q) - blk_put_queue(dev->ctrl.admin_q); - free_opal_dev(dev->ctrl.opal_dev); - mempool_destroy(dev->iod_mempool); put_device(dev->dev); kfree(dev->queues); kfree(dev); } -static void nvme_remove_dead_ctrl(struct nvme_dev *dev) -{ - /* - * Set state to deleting now to avoid blocking nvme_wait_reset(), which - * may be holding this pci_dev's device lock. - */ - nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); - nvme_get_ctrl(&dev->ctrl); - nvme_dev_disable(dev, false); - nvme_mark_namespaces_dead(&dev->ctrl); - if (!queue_work(nvme_wq, &dev->remove_work)) - nvme_put_ctrl(&dev->ctrl); -} - static void nvme_reset_work(struct work_struct *work) { struct nvme_dev *dev = @@ -2819,34 +2822,7 @@ static void nvme_reset_work(struct work_struct *work) result = nvme_pci_enable(dev); if (result) goto out_unlock; - - result = nvme_pci_configure_admin_queue(dev); - if (result) - goto out_unlock; - - if (!dev->ctrl.admin_q) { - result = nvme_pci_alloc_admin_tag_set(dev); - if (result) - goto out_unlock; - } else { - nvme_start_admin_queue(&dev->ctrl); - } - - dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); - - /* - * Limit the max command size to prevent iod->sg allocations going - * over a single page. - */ - dev->ctrl.max_hw_sectors = min_t(u32, - NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9); - dev->ctrl.max_segments = NVME_MAX_SEGS; - - /* - * Don't limit the IOMMU merged segment size. - */ - dma_set_max_seg_size(dev->dev, 0xffffffff); - + nvme_unquiesce_admin_queue(&dev->ctrl); mutex_unlock(&dev->shutdown_lock); /* @@ -2860,75 +2836,37 @@ static void nvme_reset_work(struct work_struct *work) goto out; } - /* - * We do not support an SGL for metadata (yet), so we are limited to a - * single integrity segment for the separate metadata pointer. - */ - dev->ctrl.max_integrity_segments = 1; - - result = nvme_init_ctrl_finish(&dev->ctrl); + result = nvme_init_ctrl_finish(&dev->ctrl, was_suspend); if (result) goto out; - if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) { - if (!dev->ctrl.opal_dev) - dev->ctrl.opal_dev = - init_opal_dev(&dev->ctrl, &nvme_sec_submit); - else if (was_suspend) - opal_unlock_from_suspend(dev->ctrl.opal_dev); - } else { - free_opal_dev(dev->ctrl.opal_dev); - dev->ctrl.opal_dev = NULL; - } + nvme_dbbuf_dma_alloc(dev); - if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { - result = nvme_dbbuf_dma_alloc(dev); - if (result) - dev_warn(dev->dev, - "unable to allocate dma for dbbuf\n"); - } - - if (dev->ctrl.hmpre) { - result = nvme_setup_host_mem(dev); - if (result < 0) - goto out; - } + result = nvme_setup_host_mem(dev); + if (result < 0) + goto out; result = nvme_setup_io_queues(dev); if (result) goto out; - if (dev->ctrl.tagset) { - /* - * This is a controller reset and we already have a tagset. - * Freeze and update the number of I/O queues as thos might have - * changed. If there are no I/O queues left after this reset, - * keep the controller around but remove all namespaces. - */ - if (dev->online_queues > 1) { - nvme_start_queues(&dev->ctrl); - nvme_wait_freeze(&dev->ctrl); - nvme_pci_update_nr_queues(dev); - nvme_dbbuf_set(dev); - nvme_unfreeze(&dev->ctrl); - } else { - dev_warn(dev->ctrl.device, "IO queues lost\n"); - nvme_mark_namespaces_dead(&dev->ctrl); - nvme_start_queues(&dev->ctrl); - nvme_remove_namespaces(&dev->ctrl); - nvme_free_tagset(dev); - } + /* + * Freeze and update the number of I/O queues as thos might have + * changed. If there are no I/O queues left after this reset, keep the + * controller around but remove all namespaces. + */ + if (dev->online_queues > 1) { + nvme_unquiesce_io_queues(&dev->ctrl); + nvme_wait_freeze(&dev->ctrl); + nvme_pci_update_nr_queues(dev); + nvme_dbbuf_set(dev); + nvme_unfreeze(&dev->ctrl); } else { - /* - * First probe. Still allow the controller to show up even if - * there are no namespaces. - */ - if (dev->online_queues > 1) { - nvme_pci_alloc_tag_set(dev); - nvme_dbbuf_set(dev); - } else { - dev_warn(dev->ctrl.device, "IO queues not created\n"); - } + dev_warn(dev->ctrl.device, "IO queues lost\n"); + nvme_mark_namespaces_dead(&dev->ctrl); + nvme_unquiesce_io_queues(&dev->ctrl); + nvme_remove_namespaces(&dev->ctrl); + nvme_free_tagset(dev); } /* @@ -2942,30 +2880,22 @@ static void nvme_reset_work(struct work_struct *work) goto out; } - if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj, - &nvme_pci_attr_group)) - dev->attrs_added = true; - nvme_start_ctrl(&dev->ctrl); return; out_unlock: mutex_unlock(&dev->shutdown_lock); out: - if (result) - dev_warn(dev->ctrl.device, - "Removing after probe failure status: %d\n", result); - nvme_remove_dead_ctrl(dev); -} - -static void nvme_remove_dead_ctrl_work(struct work_struct *work) -{ - struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); - struct pci_dev *pdev = to_pci_dev(dev->dev); - - if (pci_get_drvdata(pdev)) - device_release_driver(&pdev->dev); - nvme_put_ctrl(&dev->ctrl); + /* + * Set state to deleting now to avoid blocking nvme_wait_reset(), which + * may be holding this pci_dev's device lock. + */ + dev_warn(dev->ctrl.device, "Disabling device after reset failure: %d\n", + result); + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); + nvme_dev_disable(dev, true); + nvme_mark_namespaces_dead(&dev->ctrl); + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); } static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) @@ -3018,6 +2948,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .name = "pcie", .module = THIS_MODULE, .flags = NVME_F_METADATA_SUPPORTED, + .dev_attr_groups = nvme_pci_dev_attr_groups, .reg_read32 = nvme_pci_reg_read32, .reg_write32 = nvme_pci_reg_write32, .reg_read64 = nvme_pci_reg_read64, @@ -3087,29 +3018,22 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return 0; } -static void nvme_async_probe(void *data, async_cookie_t cookie) +static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, + const struct pci_device_id *id) { - struct nvme_dev *dev = data; - - flush_work(&dev->ctrl.reset_work); - flush_work(&dev->ctrl.scan_work); - nvme_put_ctrl(&dev->ctrl); -} - -static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - int node, result = -ENOMEM; - struct nvme_dev *dev; unsigned long quirks = id->driver_data; - size_t alloc_size; + int node = dev_to_node(&pdev->dev); + struct nvme_dev *dev; + int ret = -ENOMEM; - node = dev_to_node(&pdev->dev); if (node == NUMA_NO_NODE) set_dev_node(&pdev->dev, first_memory_node); dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) - return -ENOMEM; + return NULL; + INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); + mutex_init(&dev->shutdown_lock); dev->nr_write_queues = write_queues; dev->nr_poll_queues = poll_queues; @@ -3117,25 +3041,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev->queues = kcalloc_node(dev->nr_allocated_queues, sizeof(struct nvme_queue), GFP_KERNEL, node); if (!dev->queues) - goto free; + goto out_free_dev; dev->dev = get_device(&pdev->dev); - pci_set_drvdata(pdev, dev); - - result = nvme_dev_map(dev); - if (result) - goto put_pci; - - INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); - INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); - mutex_init(&dev->shutdown_lock); - - result = nvme_setup_prp_pools(dev); - if (result) - goto unmap; quirks |= check_vendor_combination_bug(pdev); - if (!noacpi && acpi_storage_d3(&pdev->dev)) { /* * Some systems use a bios work around to ask for D3 on @@ -3145,46 +3055,128 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) "platform quirk: setting simple suspend\n"); quirks |= NVME_QUIRK_SIMPLE_SUSPEND; } + ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, + quirks); + if (ret) + goto out_put_device; + + dma_set_min_align_mask(&pdev->dev, NVME_CTRL_PAGE_SIZE - 1); + dma_set_max_seg_size(&pdev->dev, 0xffffffff); /* - * Double check that our mempool alloc size will cover the biggest - * command we support. + * Limit the max command size to prevent iod->sg allocations going + * over a single page. */ - alloc_size = nvme_pci_iod_alloc_size(); - WARN_ON_ONCE(alloc_size > PAGE_SIZE); + dev->ctrl.max_hw_sectors = min_t(u32, + NVME_MAX_KB_SZ << 1, dma_max_mapping_size(&pdev->dev) >> 9); + dev->ctrl.max_segments = NVME_MAX_SEGS; - dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, - mempool_kfree, - (void *) alloc_size, - GFP_KERNEL, node); - if (!dev->iod_mempool) { - result = -ENOMEM; - goto release_pools; - } + /* + * There is no support for SGLs for metadata (yet), so we are limited to + * a single integrity segment for the separate metadata pointer. + */ + dev->ctrl.max_integrity_segments = 1; + return dev; + +out_put_device: + put_device(dev->dev); + kfree(dev->queues); +out_free_dev: + kfree(dev); + return ERR_PTR(ret); +} + +static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct nvme_dev *dev; + int result = -ENOMEM; - result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, - quirks); + dev = nvme_pci_alloc_dev(pdev, id); + if (!dev) + return -ENOMEM; + + result = nvme_dev_map(dev); if (result) - goto release_mempool; + goto out_uninit_ctrl; + + result = nvme_setup_prp_pools(dev); + if (result) + goto out_dev_unmap; + + result = nvme_pci_alloc_iod_mempool(dev); + if (result) + goto out_release_prp_pools; dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); - nvme_reset_ctrl(&dev->ctrl); - async_schedule(nvme_async_probe, dev); + result = nvme_pci_enable(dev); + if (result) + goto out_release_iod_mempool; + + result = nvme_pci_alloc_admin_tag_set(dev); + if (result) + goto out_disable; + + /* + * Mark the controller as connecting before sending admin commands to + * allow the timeout handler to do the right thing. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { + dev_warn(dev->ctrl.device, + "failed to mark controller CONNECTING\n"); + result = -EBUSY; + goto out_disable; + } + + result = nvme_init_ctrl_finish(&dev->ctrl, false); + if (result) + goto out_disable; + + nvme_dbbuf_dma_alloc(dev); + + result = nvme_setup_host_mem(dev); + if (result < 0) + goto out_disable; + + result = nvme_setup_io_queues(dev); + if (result) + goto out_disable; + + if (dev->online_queues > 1) { + nvme_pci_alloc_tag_set(dev); + nvme_dbbuf_set(dev); + } else { + dev_warn(dev->ctrl.device, "IO queues not created\n"); + } + + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { + dev_warn(dev->ctrl.device, + "failed to mark controller live state\n"); + result = -ENODEV; + goto out_disable; + } + pci_set_drvdata(pdev, dev); + + nvme_start_ctrl(&dev->ctrl); + nvme_put_ctrl(&dev->ctrl); return 0; - release_mempool: +out_disable: + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); + nvme_dev_disable(dev, true); + nvme_free_host_mem(dev); + nvme_dev_remove_admin(dev); + nvme_dbbuf_dma_free(dev); + nvme_free_queues(dev, 0); +out_release_iod_mempool: mempool_destroy(dev->iod_mempool); - release_pools: +out_release_prp_pools: nvme_release_prp_pools(dev); - unmap: +out_dev_unmap: nvme_dev_unmap(dev); - put_pci: - put_device(dev->dev); - free: - kfree(dev->queues); - kfree(dev); +out_uninit_ctrl: + nvme_uninit_ctrl(&dev->ctrl); return result; } @@ -3216,13 +3208,6 @@ static void nvme_shutdown(struct pci_dev *pdev) nvme_disable_prepare_reset(dev, true); } -static void nvme_remove_attrs(struct nvme_dev *dev) -{ - if (dev->attrs_added) - sysfs_remove_group(&dev->ctrl.device->kobj, - &nvme_pci_attr_group); -} - /* * The driver's remove may be called on a device in a partially initialized * state. This function must not have any dependencies on the device state in @@ -3244,10 +3229,11 @@ static void nvme_remove(struct pci_dev *pdev) nvme_stop_ctrl(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); nvme_dev_disable(dev, true); - nvme_remove_attrs(dev); nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); + nvme_dbbuf_dma_free(dev); nvme_free_queues(dev, 0); + mempool_destroy(dev->iod_mempool); nvme_release_prp_pools(dev); nvme_dev_unmap(dev); nvme_uninit_ctrl(&dev->ctrl); @@ -3580,11 +3566,12 @@ static struct pci_driver nvme_driver = { .probe = nvme_probe, .remove = nvme_remove, .shutdown = nvme_shutdown, -#ifdef CONFIG_PM_SLEEP .driver = { - .pm = &nvme_dev_pm_ops, - }, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, +#ifdef CONFIG_PM_SLEEP + .pm = &nvme_dev_pm_ops, #endif + }, .sriov_configure = pci_sriov_configure_simple, .err_handler = &nvme_err_handler, }; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6e079abb22ee..de591cdf78f3 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -869,16 +869,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, else ctrl->ctrl.max_integrity_segments = 0; - nvme_start_admin_queue(&ctrl->ctrl); + nvme_unquiesce_admin_queue(&ctrl->ctrl); - error = nvme_init_ctrl_finish(&ctrl->ctrl); + error = nvme_init_ctrl_finish(&ctrl->ctrl, false); if (error) goto out_quiesce_queue; return 0; out_quiesce_queue: - nvme_stop_admin_queue(&ctrl->ctrl); + nvme_quiesce_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); out_stop_queue: nvme_rdma_stop_queue(&ctrl->queues[0]); @@ -922,7 +922,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_cleanup_tagset; if (!new) { - nvme_start_queues(&ctrl->ctrl); + nvme_unquiesce_io_queues(&ctrl->ctrl); if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) { /* * If we timed out waiting for freeze we are likely to @@ -949,7 +949,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) return 0; out_wait_freeze_timed_out: - nvme_stop_queues(&ctrl->ctrl); + nvme_quiesce_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); out_cleanup_tagset: @@ -964,12 +964,12 @@ out_free_io_queues: static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - nvme_stop_admin_queue(&ctrl->ctrl); + nvme_quiesce_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); if (remove) { - nvme_start_admin_queue(&ctrl->ctrl); + nvme_unquiesce_admin_queue(&ctrl->ctrl); nvme_remove_admin_tag_set(&ctrl->ctrl); } nvme_rdma_destroy_admin_queue(ctrl); @@ -980,12 +980,12 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, { if (ctrl->ctrl.queue_count > 1) { nvme_start_freeze(&ctrl->ctrl); - nvme_stop_queues(&ctrl->ctrl); + nvme_quiesce_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); if (remove) { - nvme_start_queues(&ctrl->ctrl); + nvme_unquiesce_io_queues(&ctrl->ctrl); nvme_remove_io_tag_set(&ctrl->ctrl); } nvme_rdma_free_io_queues(ctrl); @@ -1106,7 +1106,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) destroy_io: if (ctrl->ctrl.queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); + nvme_quiesce_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); @@ -1115,7 +1115,7 @@ destroy_io: nvme_rdma_free_io_queues(ctrl); } destroy_admin: - nvme_stop_admin_queue(&ctrl->ctrl); + nvme_quiesce_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); @@ -1153,13 +1153,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); - nvme_auth_stop(&ctrl->ctrl); nvme_stop_keep_alive(&ctrl->ctrl); flush_work(&ctrl->ctrl.async_event_work); nvme_rdma_teardown_io_queues(ctrl, false); - nvme_start_queues(&ctrl->ctrl); + nvme_unquiesce_io_queues(&ctrl->ctrl); nvme_rdma_teardown_admin_queue(ctrl, false); - nvme_start_admin_queue(&ctrl->ctrl); + nvme_unquiesce_admin_queue(&ctrl->ctrl); + nvme_auth_stop(&ctrl->ctrl); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ @@ -2207,7 +2207,7 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { nvme_rdma_teardown_io_queues(ctrl, shutdown); - nvme_stop_admin_queue(&ctrl->ctrl); + nvme_quiesce_admin_queue(&ctrl->ctrl); if (shutdown) nvme_shutdown_ctrl(&ctrl->ctrl); else diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1eed0fc26b3a..776b8d9dfca7 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1875,7 +1875,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) goto out_cleanup_connect_q; if (!new) { - nvme_start_queues(ctrl); + nvme_unquiesce_io_queues(ctrl); if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { /* * If we timed out waiting for freeze we are likely to @@ -1902,7 +1902,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) return 0; out_wait_freeze_timed_out: - nvme_stop_queues(ctrl); + nvme_quiesce_io_queues(ctrl); nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); out_cleanup_connect_q: @@ -1947,16 +1947,16 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) if (error) goto out_stop_queue; - nvme_start_admin_queue(ctrl); + nvme_unquiesce_admin_queue(ctrl); - error = nvme_init_ctrl_finish(ctrl); + error = nvme_init_ctrl_finish(ctrl, false); if (error) goto out_quiesce_queue; return 0; out_quiesce_queue: - nvme_stop_admin_queue(ctrl); + nvme_quiesce_admin_queue(ctrl); blk_sync_queue(ctrl->admin_q); out_stop_queue: nvme_tcp_stop_queue(ctrl, 0); @@ -1972,12 +1972,12 @@ out_free_queue: static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove) { - nvme_stop_admin_queue(ctrl); + nvme_quiesce_admin_queue(ctrl); blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); nvme_cancel_admin_tagset(ctrl); if (remove) - nvme_start_admin_queue(ctrl); + nvme_unquiesce_admin_queue(ctrl); nvme_tcp_destroy_admin_queue(ctrl, remove); } @@ -1986,14 +1986,14 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, { if (ctrl->queue_count <= 1) return; - nvme_stop_admin_queue(ctrl); + nvme_quiesce_admin_queue(ctrl); nvme_start_freeze(ctrl); - nvme_stop_queues(ctrl); + nvme_quiesce_io_queues(ctrl); nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); nvme_cancel_tagset(ctrl); if (remove) - nvme_start_queues(ctrl); + nvme_unquiesce_io_queues(ctrl); nvme_tcp_destroy_io_queues(ctrl, remove); } @@ -2074,14 +2074,14 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) destroy_io: if (ctrl->queue_count > 1) { - nvme_stop_queues(ctrl); + nvme_quiesce_io_queues(ctrl); nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); nvme_cancel_tagset(ctrl); nvme_tcp_destroy_io_queues(ctrl, new); } destroy_admin: - nvme_stop_admin_queue(ctrl); + nvme_quiesce_admin_queue(ctrl); blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); nvme_cancel_admin_tagset(ctrl); @@ -2119,14 +2119,14 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_tcp_ctrl, err_work); struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; - nvme_auth_stop(ctrl); nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); /* unquiesce to fail fast pending requests */ - nvme_start_queues(ctrl); + nvme_unquiesce_io_queues(ctrl); nvme_tcp_teardown_admin_queue(ctrl, false); - nvme_start_admin_queue(ctrl); + nvme_unquiesce_admin_queue(ctrl); + nvme_auth_stop(ctrl); if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ @@ -2141,7 +2141,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) { nvme_tcp_teardown_io_queues(ctrl, shutdown); - nvme_stop_admin_queue(ctrl); + nvme_quiesce_admin_queue(ctrl); if (shutdown) nvme_shutdown_ctrl(ctrl); else diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index c8a061ce3ee5..6b46f90a63cf 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -370,7 +370,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number, strlen(subsys->model_number), ' '); memcpy_and_pad(id->fr, sizeof(id->fr), - UTS_RELEASE, strlen(UTS_RELEASE), ' '); + subsys->firmware_rev, strlen(subsys->firmware_rev), ' '); + + put_unaligned_le24(subsys->ieee_oui, id->ieee); id->rab = 6; @@ -379,11 +381,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) else id->cntrltype = NVME_CTRL_IO; - /* - * XXX: figure out how we can assign a IEEE OUI, but until then - * the safest is to leave it as zeroes. - */ - /* we support multiple ports, multiples hosts and ANA: */ id->cmic = NVME_CTRL_CMIC_MULTI_PORT | NVME_CTRL_CMIC_MULTI_CTRL | NVME_CTRL_CMIC_ANA; diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index c4113b43dbfe..4dcddcf95279 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -45,9 +45,11 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, if (!dhchap_secret) return -ENOMEM; if (set_ctrl) { + kfree(host->dhchap_ctrl_secret); host->dhchap_ctrl_secret = strim(dhchap_secret); host->dhchap_ctrl_key_hash = key_hash; } else { + kfree(host->dhchap_secret); host->dhchap_secret = strim(dhchap_secret); host->dhchap_key_hash = key_hash; } diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 9443ee1d4ae3..d48deb9bdb27 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1259,6 +1259,116 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_subsys_, attr_model); +static ssize_t nvmet_subsys_attr_ieee_oui_show(struct config_item *item, + char *page) +{ + struct nvmet_subsys *subsys = to_subsys(item); + + return sysfs_emit(page, "0x%06x\n", subsys->ieee_oui); +} + +static ssize_t nvmet_subsys_attr_ieee_oui_store_locked(struct nvmet_subsys *subsys, + const char *page, size_t count) +{ + uint32_t val = 0; + int ret; + + if (subsys->subsys_discovered) { + pr_err("Can't set IEEE OUI. 0x%06x is already assigned\n", + subsys->ieee_oui); + return -EINVAL; + } + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (val >= 0x1000000) + return -EINVAL; + + subsys->ieee_oui = val; + + return count; +} + +static ssize_t nvmet_subsys_attr_ieee_oui_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + ssize_t ret; + + down_write(&nvmet_config_sem); + mutex_lock(&subsys->lock); + ret = nvmet_subsys_attr_ieee_oui_store_locked(subsys, page, count); + mutex_unlock(&subsys->lock); + up_write(&nvmet_config_sem); + + return ret; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_ieee_oui); + +static ssize_t nvmet_subsys_attr_firmware_show(struct config_item *item, + char *page) +{ + struct nvmet_subsys *subsys = to_subsys(item); + + return sysfs_emit(page, "%s\n", subsys->firmware_rev); +} + +static ssize_t nvmet_subsys_attr_firmware_store_locked(struct nvmet_subsys *subsys, + const char *page, size_t count) +{ + int pos = 0, len; + char *val; + + if (subsys->subsys_discovered) { + pr_err("Can't set firmware revision. %s is already assigned\n", + subsys->firmware_rev); + return -EINVAL; + } + + len = strcspn(page, "\n"); + if (!len) + return -EINVAL; + + if (len > NVMET_FR_MAX_SIZE) { + pr_err("Firmware revision size can not exceed %d Bytes\n", + NVMET_FR_MAX_SIZE); + return -EINVAL; + } + + for (pos = 0; pos < len; pos++) { + if (!nvmet_is_ascii(page[pos])) + return -EINVAL; + } + + val = kmemdup_nul(page, len, GFP_KERNEL); + if (!val) + return -ENOMEM; + + kfree(subsys->firmware_rev); + + subsys->firmware_rev = val; + + return count; +} + +static ssize_t nvmet_subsys_attr_firmware_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + ssize_t ret; + + down_write(&nvmet_config_sem); + mutex_lock(&subsys->lock); + ret = nvmet_subsys_attr_firmware_store_locked(subsys, page, count); + mutex_unlock(&subsys->lock); + up_write(&nvmet_config_sem); + + return ret; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_firmware); + #ifdef CONFIG_BLK_DEV_INTEGRITY static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, char *page) @@ -1290,6 +1400,8 @@ static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item, static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item, const char *page, size_t cnt) { + struct nvmet_subsys *subsys = to_subsys(item); + struct nvmet_ctrl *ctrl; u16 qid_max; if (sscanf(page, "%hu\n", &qid_max) != 1) @@ -1299,8 +1411,13 @@ static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item, return -EINVAL; down_write(&nvmet_config_sem); - to_subsys(item)->max_qid = qid_max; + subsys->max_qid = qid_max; + + /* Force reconnect */ + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) + ctrl->ops->delete_ctrl(ctrl); up_write(&nvmet_config_sem); + return cnt; } CONFIGFS_ATTR(nvmet_subsys_, attr_qid_max); @@ -1313,6 +1430,8 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_model, &nvmet_subsys_attr_attr_qid_max, + &nvmet_subsys_attr_attr_ieee_oui, + &nvmet_subsys_attr_attr_firmware, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_subsys_attr_attr_pi_enable, #endif diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index aecb5853f8da..f66ed13d7c11 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -10,11 +10,14 @@ #include <linux/pci-p2pdma.h> #include <linux/scatterlist.h> +#include <generated/utsrelease.h> + #define CREATE_TRACE_POINTS #include "trace.h" #include "nvmet.h" +struct kmem_cache *nvmet_bvec_cache; struct workqueue_struct *buffered_io_wq; struct workqueue_struct *zbd_wq; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; @@ -695,11 +698,10 @@ static void nvmet_update_sq_head(struct nvmet_req *req) if (req->sq->size) { u32 old_sqhd, new_sqhd; + old_sqhd = READ_ONCE(req->sq->sqhd); do { - old_sqhd = req->sq->sqhd; new_sqhd = (old_sqhd + 1) % req->sq->size; - } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != - old_sqhd); + } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd)); } req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); } @@ -1561,6 +1563,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, goto free_subsys; } + subsys->ieee_oui = 0; + + subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL); + if (!subsys->firmware_rev) { + ret = -ENOMEM; + goto free_mn; + } + switch (type) { case NVME_NQN_NVME: subsys->max_qid = NVMET_NR_QUEUES; @@ -1572,14 +1582,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, default: pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); ret = -EINVAL; - goto free_mn; + goto free_fr; } subsys->type = type; subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, GFP_KERNEL); if (!subsys->subsysnqn) { ret = -ENOMEM; - goto free_mn; + goto free_fr; } subsys->cntlid_min = NVME_CNTLID_MIN; subsys->cntlid_max = NVME_CNTLID_MAX; @@ -1592,6 +1602,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, return subsys; +free_fr: + kfree(subsys->firmware_rev); free_mn: kfree(subsys->model_number); free_subsys: @@ -1611,6 +1623,7 @@ static void nvmet_subsys_free(struct kref *ref) kfree(subsys->subsysnqn); kfree(subsys->model_number); + kfree(subsys->firmware_rev); kfree(subsys); } @@ -1631,26 +1644,28 @@ void nvmet_subsys_put(struct nvmet_subsys *subsys) static int __init nvmet_init(void) { - int error; + int error = -ENOMEM; nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; + nvmet_bvec_cache = kmem_cache_create("nvmet-bvec", + NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!nvmet_bvec_cache) + return -ENOMEM; + zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0); if (!zbd_wq) - return -ENOMEM; + goto out_destroy_bvec_cache; buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", WQ_MEM_RECLAIM, 0); - if (!buffered_io_wq) { - error = -ENOMEM; + if (!buffered_io_wq) goto out_free_zbd_work_queue; - } nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0); - if (!nvmet_wq) { - error = -ENOMEM; + if (!nvmet_wq) goto out_free_buffered_work_queue; - } error = nvmet_init_discovery(); if (error) @@ -1669,6 +1684,8 @@ out_free_buffered_work_queue: destroy_workqueue(buffered_io_wq); out_free_zbd_work_queue: destroy_workqueue(zbd_wq); +out_destroy_bvec_cache: + kmem_cache_destroy(nvmet_bvec_cache); return error; } @@ -1680,6 +1697,7 @@ static void __exit nvmet_exit(void) destroy_workqueue(nvmet_wq); destroy_workqueue(buffered_io_wq); destroy_workqueue(zbd_wq); + kmem_cache_destroy(nvmet_bvec_cache); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 64b47e2a4633..e55ec6fefd7f 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -11,7 +11,6 @@ #include <linux/fs.h> #include "nvmet.h" -#define NVMET_MAX_MPOOL_BVEC 16 #define NVMET_MIN_MPOOL_OBJ 16 void nvmet_file_ns_revalidate(struct nvmet_ns *ns) @@ -26,8 +25,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) flush_workqueue(buffered_io_wq); mempool_destroy(ns->bvec_pool); ns->bvec_pool = NULL; - kmem_cache_destroy(ns->bvec_cache); - ns->bvec_cache = NULL; fput(ns->file); ns->file = NULL; } @@ -59,16 +56,8 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) ns->blksize_shift = min_t(u8, file_inode(ns->file)->i_blkbits, 12); - ns->bvec_cache = kmem_cache_create("nvmet-bvec", - NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ns->bvec_cache) { - ret = -ENOMEM; - goto err; - } - ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab, - mempool_free_slab, ns->bvec_cache); + mempool_free_slab, nvmet_bvec_cache); if (!ns->bvec_pool) { ret = -ENOMEM; @@ -77,9 +66,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) return ret; err: + fput(ns->file); + ns->file = NULL; ns->size = 0; ns->blksize_shift = 0; - nvmet_file_ns_disable(ns); return ret; } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index b45fe3adf015..4173099ef9a4 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -375,9 +375,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ctrl->ctrl.max_hw_sectors = (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9); - nvme_start_admin_queue(&ctrl->ctrl); + nvme_unquiesce_admin_queue(&ctrl->ctrl); - error = nvme_init_ctrl_finish(&ctrl->ctrl); + error = nvme_init_ctrl_finish(&ctrl->ctrl, false); if (error) goto out_cleanup_tagset; @@ -394,12 +394,12 @@ out_free_sq: static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) { if (ctrl->ctrl.queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); + nvme_quiesce_io_queues(&ctrl->ctrl); nvme_cancel_tagset(&ctrl->ctrl); nvme_loop_destroy_io_queues(ctrl); } - nvme_stop_admin_queue(&ctrl->ctrl); + nvme_quiesce_admin_queue(&ctrl->ctrl); if (ctrl->ctrl.state == NVME_CTRL_LIVE) nvme_shutdown_ctrl(&ctrl->ctrl); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index dfe3894205aa..89bedfcd974c 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -29,6 +29,7 @@ #define NVMET_DEFAULT_CTRL_MODEL "Linux" #define NVMET_MN_MAX_SIZE 40 #define NVMET_SN_MAX_SIZE 20 +#define NVMET_FR_MAX_SIZE 8 /* * Supported optional AENs: @@ -77,7 +78,6 @@ struct nvmet_ns { struct completion disable_done; mempool_t *bvec_pool; - struct kmem_cache *bvec_cache; int use_p2pmem; struct pci_dev *p2p_dev; @@ -264,6 +264,8 @@ struct nvmet_subsys { struct config_group allowed_hosts_group; char *model_number; + u32 ieee_oui; + char *firmware_rev; #ifdef CONFIG_NVME_TARGET_PASSTHRU struct nvme_ctrl *passthru_ctrl; @@ -393,6 +395,8 @@ struct nvmet_req { u64 error_slba; }; +#define NVMET_MAX_MPOOL_BVEC 16 +extern struct kmem_cache *nvmet_bvec_cache; extern struct workqueue_struct *buffered_io_wq; extern struct workqueue_struct *zbd_wq; extern struct workqueue_struct *nvmet_wq; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 050d7d0cd81b..d6be2a686100 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -797,6 +797,7 @@ enum nvme_opcode { nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_recv = 0x7a, nvme_cmd_zone_append = 0x7d, + nvme_cmd_vendor_start = 0x80, }; #define nvme_opcode_name(opcode) { opcode, #opcode } @@ -963,6 +964,7 @@ enum { NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRACT = 1 << 13, NVME_RW_DTYPE_STREAMS = 1 << 4, + NVME_WZ_DEAC = 1 << 9, }; struct nvme_dsm_cmd { |