From 3110b942d36b961858664486d72f815d78c956c3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 13 Sep 2021 16:25:56 -0300 Subject: IB/qib: Fix clang confusion of NULL pointer comparison clang becomes confused due to the comparison to NULL in a integer constant expression context: >> drivers/infiniband/hw/qib/qib_sysfs.c:413:1: error: static_assert expression is not an integral constant expression QIB_DIAGC_ATTR(rc_resends); ^~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/infiniband/hw/qib/qib_sysfs.c:406:16: note: expanded from macro 'QIB_DIAGC_ATTR' static_assert(&((struct qib_ibport *)0)->rvp.n_##N != (u64 *)NULL); \ Nathan found __same_type that solves this problem nicely, so use it instead. Reported-by: kernel test robot Suggested-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 452e2355d24e..0a3b28142c05 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -403,7 +403,7 @@ static ssize_t diagc_attr_store(struct ib_device *ibdev, u32 port_num, } #define QIB_DIAGC_ATTR(N) \ - static_assert(&((struct qib_ibport *)0)->rvp.n_##N != (u64 *)NULL); \ + static_assert(__same_type(((struct qib_ibport *)0)->rvp.n_##N, u64)); \ static struct qib_diagc_attr qib_diagc_attr_##N = { \ .attr = __ATTR(N, 0664, diagc_attr_show, diagc_attr_store), \ .counter = \ -- cgit v1.2.3 From 2cc74e1ee31d00393b6698ec80b322fd26523da4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 8 Sep 2021 13:43:28 +0200 Subject: IB/cma: Do not send IGMP leaves for sendonly Multicast groups ROCE uses IGMP for Multicast instead of the native Infiniband system where joins are required in order to post messages on the Multicast group. On Ethernet one can send Multicast messages to arbitrary addresses without the need to subscribe to a group. So ROCE correctly does not send IGMP joins during rdma_join_multicast(). F.e. in cma_iboe_join_multicast() we see: if (addr->sa_family == AF_INET) { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; if (!send_only) { err = cma_igmp_send(ndev, &ib.rec.mgid, true); } } } else { So the IGMP join is suppressed as it is unnecessary. However no such check is done in destroy_mc(). And therefore leaving a sendonly multicast group will send an IGMP leave. This means that the following scenario can lead to a multicast receiver unexpectedly being unsubscribed from a MC group: 1. Sender thread does a sendonly join on MC group X. No IGMP join is sent. 2. Receiver thread does a regular join on the same MC Group x. IGMP join is sent and the receiver begins to get messages. 3. Sender thread terminates and destroys MC group X. IGMP leave is sent and the receiver no longer receives data. This patch adds the same logic for sendonly joins to destroy_mc() that is also used in cma_iboe_join_multicast(). Fixes: ab15c95a17b3 ("IB/core: Support for CMA multicast join flags") Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2109081340540.668072@gentwo.de Signed-off-by: Christoph Lameter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c40791baced5..86ee3b01b3ee 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1810,6 +1810,8 @@ static void cma_release_port(struct rdma_id_private *id_priv) static void destroy_mc(struct rdma_id_private *id_priv, struct cma_multicast *mc) { + bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); + if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) ib_sa_free_multicast(mc->sa_mc); @@ -1826,7 +1828,10 @@ static void destroy_mc(struct rdma_id_private *id_priv, cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, &mgid); - cma_igmp_send(ndev, &mgid, false); + + if (!send_only) + cma_igmp_send(ndev, &mgid, false); + dev_put(ndev); } -- cgit v1.2.3 From ca465e1f1f9b38fe916a36f7d80c5d25f2337c81 Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Mon, 13 Sep 2021 17:33:44 +0800 Subject: RDMA/cma: Fix listener leak in rdma_cma_listen_on_all() failure If cma_listen_on_all() fails it leaves the per-device ID still on the listen_list but the state is not set to RDMA_CM_ADDR_BOUND. When the cmid is eventually destroyed cma_cancel_listens() is not called due to the wrong state, however the per-device IDs are still holding the refcount preventing the ID from being destroyed, thus deadlocking: task:rping state:D stack: 0 pid:19605 ppid: 47036 flags:0x00000084 Call Trace: __schedule+0x29a/0x780 ? free_unref_page_commit+0x9b/0x110 schedule+0x3c/0xa0 schedule_timeout+0x215/0x2b0 ? __flush_work+0x19e/0x1e0 wait_for_completion+0x8d/0xf0 _destroy_id+0x144/0x210 [rdma_cm] ucma_close_id+0x2b/0x40 [rdma_ucm] __destroy_id+0x93/0x2c0 [rdma_ucm] ? __xa_erase+0x4a/0xa0 ucma_destroy_id+0x9a/0x120 [rdma_ucm] ucma_write+0xb8/0x130 [rdma_ucm] vfs_write+0xb4/0x250 ksys_write+0xb5/0xd0 ? syscall_trace_enter.isra.19+0x123/0x190 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Ensure that cma_listen_on_all() atomically unwinds its action under the lock during error. Fixes: c80a0c52d85c ("RDMA/cma: Add missing error handling of listen_id") Link: https://lore.kernel.org/r/20210913093344.17230-1-thomas.liu@ucloud.cn Signed-off-by: Tao Liu Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 86ee3b01b3ee..5aa58897965d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1746,15 +1746,16 @@ static void cma_cancel_route(struct rdma_id_private *id_priv) } } -static void cma_cancel_listens(struct rdma_id_private *id_priv) +static void _cma_cancel_listens(struct rdma_id_private *id_priv) { struct rdma_id_private *dev_id_priv; + lockdep_assert_held(&lock); + /* * Remove from listen_any_list to prevent added devices from spawning * additional listen requests. */ - mutex_lock(&lock); list_del(&id_priv->list); while (!list_empty(&id_priv->listen_list)) { @@ -1768,6 +1769,12 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv) rdma_destroy_id(&dev_id_priv->id); mutex_lock(&lock); } +} + +static void cma_cancel_listens(struct rdma_id_private *id_priv) +{ + mutex_lock(&lock); + _cma_cancel_listens(id_priv); mutex_unlock(&lock); } @@ -2579,7 +2586,7 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) return 0; err_listen: - list_del(&id_priv->list); + _cma_cancel_listens(id_priv); mutex_unlock(&lock); if (to_destroy) rdma_destroy_id(&to_destroy->id); -- cgit v1.2.3 From 5b1e985f7626307c451f98883f5e2665ee208e1c Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Thu, 16 Sep 2021 14:12:19 -0500 Subject: RDMA/irdma: Skip CQP ring during a reset Due to duplicate reset flags, CQP commands are processed during reset. This leads CQP failures such as below: irdma0: [Delete Local MAC Entry Cmd Error][op_code=49] status=-27 waiting=1 completion_err=0 maj=0x0 min=0x0 Remove the redundant flag and set the correct reset flag so CPQ is paused during reset Fixes: 8498a30e1b94 ("RDMA/irdma: Register auxiliary driver and implement private channel OPs") Link: https://lore.kernel.org/r/20210916191222.824-2-shiraz.saleem@intel.com Reported-by: LiLiang Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/cm.c | 4 ++-- drivers/infiniband/hw/irdma/hw.c | 6 +++--- drivers/infiniband/hw/irdma/i40iw_if.c | 2 +- drivers/infiniband/hw/irdma/main.h | 1 - drivers/infiniband/hw/irdma/utils.c | 2 +- drivers/infiniband/hw/irdma/verbs.c | 3 +-- 6 files changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 6b62299abfbb..6dea0a49d171 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3496,7 +3496,7 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || last_ae == IRDMA_AE_BAD_CLOSE || - last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->reset)) { + last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) { issue_close = 1; iwqp->cm_id = NULL; qp->term_flags = 0; @@ -4250,7 +4250,7 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, teardown_entry); attr.qp_state = IB_QPS_ERR; irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL); - if (iwdev->reset) + if (iwdev->rf->reset) irdma_cm_disconn(cm_node->iwqp); irdma_rem_ref_cm_node(cm_node); } diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 00de5ee9a260..33c06a3a4f63 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1489,7 +1489,7 @@ void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi) irdma_puda_dele_rsrc(vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, false); if (irdma_initialize_ieq(iwdev)) { - iwdev->reset = true; + iwdev->rf->reset = true; rf->gen_ops.request_reset(rf); } } @@ -1632,13 +1632,13 @@ void irdma_rt_deinit_hw(struct irdma_device *iwdev) case IEQ_CREATED: if (!iwdev->roce_mode) irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, - iwdev->reset); + iwdev->rf->reset); fallthrough; case ILQ_CREATED: if (!iwdev->roce_mode) irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_ILQ, - iwdev->reset); + iwdev->rf->reset); break; default: ibdev_warn(&iwdev->ibdev, "bad init_state = %d\n", iwdev->init_state); diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c index bddf88194d09..d219f64b2c3d 100644 --- a/drivers/infiniband/hw/irdma/i40iw_if.c +++ b/drivers/infiniband/hw/irdma/i40iw_if.c @@ -55,7 +55,7 @@ static void i40iw_close(struct i40e_info *cdev_info, struct i40e_client *client, iwdev = to_iwdev(ibdev); if (reset) - iwdev->reset = true; + iwdev->rf->reset = true; iwdev->iw_status = 0; irdma_port_ibevent(iwdev); diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 743d9e143a99..b678fe712447 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -346,7 +346,6 @@ struct irdma_device { bool roce_mode:1; bool roce_dcqcn_en:1; bool dcb:1; - bool reset:1; bool iw_ooo:1; enum init_completion_state init_state; diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index e94470991fe0..ac91ea5296db 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2507,7 +2507,7 @@ void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp) struct irdma_qp *qp = sc_qp->qp_uk.back_qp; struct ib_qp_attr attr; - if (qp->iwdev->reset) + if (qp->iwdev->rf->reset) return; attr.qp_state = IB_QPS_ERR; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 4fc323402073..829ddfa7e144 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -535,8 +535,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) irdma_qp_rem_ref(&iwqp->ibqp); wait_for_completion(&iwqp->free_qp); irdma_free_lsmm_rsrc(iwqp); - if (!iwdev->reset) - irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); + irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); if (!iwqp->user_mode) { if (iwqp->iwscq) { -- cgit v1.2.3 From f4475f249445b3c1fb99919b0514a075b6d6b3d4 Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Thu, 16 Sep 2021 14:12:20 -0500 Subject: RDMA/irdma: Validate number of CQ entries on create CQ Add lower bound check for CQ entries at creation time. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20210916191222.824-3-shiraz.saleem@intel.com Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 829ddfa7e144..23c47482c749 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2034,7 +2034,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, /* Kmode allocations */ int rsize; - if (entries > rf->max_cqe) { + if (entries < 1 || entries > rf->max_cqe) { err_code = -EINVAL; goto cq_free_rsrc; } -- cgit v1.2.3 From d3bdcd59633907ee306057b6bb70f06dce47dddc Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Thu, 16 Sep 2021 14:12:21 -0500 Subject: RDMA/irdma: Report correct WC error when transport retry counter is exceeded When the retry counter exceeds, as the remote QP didn't send any Ack or Nack an asynchronous event (AE) for too many retries is generated. Add code to handle the AE and set the correct IB WC error code IB_WC_RETRY_EXC_ERR. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20210916191222.824-4-shiraz.saleem@intel.com Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/hw.c | 3 +++ drivers/infiniband/hw/irdma/user.h | 1 + drivers/infiniband/hw/irdma/verbs.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 33c06a3a4f63..cb9a8e24e3b7 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -176,6 +176,9 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: qp->flush_code = FLUSH_GENERAL_ERR; break; + case IRDMA_AE_LLP_TOO_MANY_RETRIES: + qp->flush_code = FLUSH_RETRY_EXC_ERR; + break; default: qp->flush_code = FLUSH_FATAL_ERR; break; diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index ff705f323233..267102d1049d 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -102,6 +102,7 @@ enum irdma_flush_opcode { FLUSH_REM_OP_ERR, FLUSH_LOC_LEN_ERR, FLUSH_FATAL_ERR, + FLUSH_RETRY_EXC_ERR, }; enum irdma_cmpl_status { diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 23c47482c749..c7e129ee74d0 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3352,6 +3352,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode return IB_WC_LOC_LEN_ERR; case FLUSH_GENERAL_ERR: return IB_WC_WR_FLUSH_ERR; + case FLUSH_RETRY_EXC_ERR: + return IB_WC_RETRY_EXC_ERR; case FLUSH_FATAL_ERR: default: return IB_WC_FATAL_ERR; -- cgit v1.2.3 From 9f7fa37a6bd90f2749c67f8524334c387d972eb9 Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Thu, 16 Sep 2021 14:12:22 -0500 Subject: RDMA/irdma: Report correct WC error when there are MW bind errors Report the correct WC error when MW bind error related asynchronous events are generated by HW. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20210916191222.824-5-shiraz.saleem@intel.com Signed-off-by: Sindhu Devale Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/hw.c | 5 +++++ drivers/infiniband/hw/irdma/user.h | 1 + drivers/infiniband/hw/irdma/verbs.c | 2 ++ 3 files changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index cb9a8e24e3b7..7de525a5ccf8 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -179,6 +179,11 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, case IRDMA_AE_LLP_TOO_MANY_RETRIES: qp->flush_code = FLUSH_RETRY_EXC_ERR; break; + case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS: + case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: + case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: + qp->flush_code = FLUSH_MW_BIND_ERR; + break; default: qp->flush_code = FLUSH_FATAL_ERR; break; diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 267102d1049d..3dcbb1fbf2c6 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -103,6 +103,7 @@ enum irdma_flush_opcode { FLUSH_LOC_LEN_ERR, FLUSH_FATAL_ERR, FLUSH_RETRY_EXC_ERR, + FLUSH_MW_BIND_ERR, }; enum irdma_cmpl_status { diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index c7e129ee74d0..7110ebf834f9 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3354,6 +3354,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode return IB_WC_WR_FLUSH_ERR; case FLUSH_RETRY_EXC_ERR: return IB_WC_RETRY_EXC_ERR; + case FLUSH_MW_BIND_ERR: + return IB_WC_MW_BIND_ERR; case FLUSH_FATAL_ERR: default: return IB_WC_FATAL_ERR; -- cgit v1.2.3 From bc0bdc5afaa740d782fbf936aaeebd65e5c2921d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 15 Sep 2021 17:21:43 -0300 Subject: RDMA/cma: Do not change route.addr.src_addr.ss_family If the state is not idle then rdma_bind_addr() will immediately fail and no change to global state should happen. For instance if the state is already RDMA_CM_LISTEN then this will corrupt the src_addr and would cause the test in cma_cancel_operation(): if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) To view a mangled src_addr, eg with a IPv6 loopback address but an IPv4 family, failing the test. This would manifest as this trace from syzkaller: BUG: KASAN: use-after-free in __list_add_valid+0x93/0xa0 lib/list_debug.c:26 Read of size 8 at addr ffff8881546491e0 by task syz-executor.1/32204 CPU: 1 PID: 32204 Comm: syz-executor.1 Not tainted 5.12.0-rc8-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x141/0x1d7 lib/dump_stack.c:120 print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:232 __kasan_report mm/kasan/report.c:399 [inline] kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:416 __list_add_valid+0x93/0xa0 lib/list_debug.c:26 __list_add include/linux/list.h:67 [inline] list_add_tail include/linux/list.h:100 [inline] cma_listen_on_all drivers/infiniband/core/cma.c:2557 [inline] rdma_listen+0x787/0xe00 drivers/infiniband/core/cma.c:3751 ucma_listen+0x16a/0x210 drivers/infiniband/core/ucma.c:1102 ucma_write+0x259/0x350 drivers/infiniband/core/ucma.c:1732 vfs_write+0x28e/0xa30 fs/read_write.c:603 ksys_write+0x1ee/0x250 fs/read_write.c:658 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae Which is indicating that an rdma_id_private was destroyed without doing cma_cancel_listens(). Instead of trying to re-use the src_addr memory to indirectly create an any address build one explicitly on the stack and bind to that as any other normal flow would do. Link: https://lore.kernel.org/r/0-v1-9fbb33f5e201+2a-cma_listen_jgg@nvidia.com Cc: stable@vger.kernel.org Fixes: 732d41c545bb ("RDMA/cma: Make the locking for automatic state transition more clear") Reported-by: syzbot+6bb0528b13611047209c@syzkaller.appspotmail.com Tested-by: Hao Sun Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5aa58897965d..8862b0e572f0 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3783,9 +3783,13 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) int ret; if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) { + struct sockaddr_in any_in = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + }; + /* For a well behaved ULP state will be RDMA_CM_IDLE */ - id->route.addr.src_addr.ss_family = AF_INET; - ret = rdma_bind_addr(id, cma_src_addr(id_priv)); + ret = rdma_bind_addr(id, (struct sockaddr *)&any_in); if (ret) return ret; if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, -- cgit v1.2.3 From 305d568b72f17f674155a2a8275f865f207b3808 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 16 Sep 2021 15:34:46 -0300 Subject: RDMA/cma: Ensure rdma_addr_cancel() happens before issuing more requests The FSM can run in a circle allowing rdma_resolve_ip() to be called twice on the same id_priv. While this cannot happen without going through the work, it violates the invariant that the same address resolution background request cannot be active twice. CPU 1 CPU 2 rdma_resolve_addr(): RDMA_CM_IDLE -> RDMA_CM_ADDR_QUERY rdma_resolve_ip(addr_handler) #1 process_one_req(): for #1 addr_handler(): RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_BOUND mutex_unlock(&id_priv->handler_mutex); [.. handler still running ..] rdma_resolve_addr(): RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDR_QUERY rdma_resolve_ip(addr_handler) !! two requests are now on the req_list rdma_destroy_id(): destroy_id_handler_unlock(): _destroy_id(): cma_cancel_operation(): rdma_addr_cancel() // process_one_req() self removes it spin_lock_bh(&lock); cancel_delayed_work(&req->work); if (!list_empty(&req->list)) == true ! rdma_addr_cancel() returns after process_on_req #1 is done kfree(id_priv) process_one_req(): for #2 addr_handler(): mutex_lock(&id_priv->handler_mutex); !! Use after free on id_priv rdma_addr_cancel() expects there to be one req on the list and only cancels the first one. The self-removal behavior of the work only happens after the handler has returned. This yields a situations where the req_list can have two reqs for the same "handle" but rdma_addr_cancel() only cancels the first one. The second req remains active beyond rdma_destroy_id() and will use-after-free id_priv once it inevitably triggers. Fix this by remembering if the id_priv has called rdma_resolve_ip() and always cancel before calling it again. This ensures the req_list never gets more than one item in it and doesn't cost anything in the normal flow that never uses this strange error path. Link: https://lore.kernel.org/r/0-v1-3bc675b8006d+22-syz_cancel_uaf_jgg@nvidia.com Cc: stable@vger.kernel.org Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager") Reported-by: syzbot+dc3dfba010d7671e05f5@syzkaller.appspotmail.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 23 +++++++++++++++++++++++ drivers/infiniband/core/cma_priv.h | 1 + 2 files changed, 24 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 8862b0e572f0..704ce595542c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1783,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, { switch (state) { case RDMA_CM_ADDR_QUERY: + /* + * We can avoid doing the rdma_addr_cancel() based on state, + * only RDMA_CM_ADDR_QUERY has a work that could still execute. + * Notice that the addr_handler work could still be exiting + * outside this state, however due to the interaction with the + * handler_mutex the work is guaranteed not to touch id_priv + * during exit. + */ rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); break; case RDMA_CM_ROUTE_QUERY: @@ -3425,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, if (dst_addr->sa_family == AF_IB) { ret = cma_resolve_ib_addr(id_priv); } else { + /* + * The FSM can return back to RDMA_CM_ADDR_BOUND after + * rdma_resolve_ip() is called, eg through the error + * path in addr_handler(). If this happens the existing + * request must be canceled before issuing a new one. + * Since canceling a request is a bit slow and this + * oddball path is rare, keep track once a request has + * been issued. The track turns out to be a permanent + * state since this is the only cancel as it is + * immediately before rdma_resolve_ip(). + */ + if (id_priv->used_resolve_ip) + rdma_addr_cancel(&id->route.addr.dev_addr); + else + id_priv->used_resolve_ip = 1; ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, &id->route.addr.dev_addr, timeout_ms, addr_handler, diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 5c463da99845..f92f101ea981 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -91,6 +91,7 @@ struct rdma_id_private { u8 afonly; u8 timeout; u8 min_rnr_timer; + u8 used_resolve_ip; enum ib_gid_type gid_type; /* -- cgit v1.2.3 From 14351f08ed5c8b888cdd95651152db7e096ee27f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 16 Sep 2021 12:05:28 -0300 Subject: RDMA/hns: Work around broken constant propagation in gcc 8 gcc 8.3 and 5.4 throw this: In function 'modify_qp_init_to_rtr', ././include/linux/compiler_types.h:322:38: error: call to '__compiletime_assert_1859' declared with attribute error: FIELD_PREP: value too large for the field _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) [..] drivers/infiniband/hw/hns/hns_roce_common.h:91:52: note: in expansion of macro 'FIELD_PREP' *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \ ^~~~~~~~~~ drivers/infiniband/hw/hns/hns_roce_common.h:95:39: note: in expansion of macro '_hr_reg_write' #define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val) ^~~~~~~~~~~~~ drivers/infiniband/hw/hns/hns_roce_hw_v2.c:4412:2: note: in expansion of macro 'hr_reg_write' hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini); Because gcc has miscalculated the constantness of lp_pktn_ini: mtu = ib_mtu_enum_to_int(ib_mtu); if (WARN_ON(mtu < 0)) [..] lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); Since mtu is limited to {256,512,1024,2048,4096} lp_pktn_ini is between 4 and 8 which is compatible with the 4 bit field in the FIELD_PREP. Work around this broken compiler by adding a 'can never be true' constraint on lp_pktn_ini's value which clears out the problem. Fixes: f0cb411aad23 ("RDMA/hns: Use new interface to modify QP context") Link: https://lore.kernel.org/r/0-v1-c773ecb137bc+11f-hns_gcc8_jgg@nvidia.com Reported-by: Geert Uytterhoeven Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5b9953105752..a9c00a2e8ebd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4397,7 +4397,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_qp->path_mtu = ib_mtu; mtu = ib_mtu_enum_to_int(ib_mtu); - if (WARN_ON(mtu < 0)) + if (WARN_ON(mtu <= 0)) + return -EINVAL; +#define MAX_LP_MSG_LEN 65536 + /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ + lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); + if (WARN_ON(lp_pktn_ini >= 0xF)) return -EINVAL; if (attr_mask & IB_QP_PATH_MTU) { @@ -4405,10 +4410,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_clear(qpc_mask, QPC_MTU); } -#define MAX_LP_MSG_LEN 65536 - /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ - lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); - hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini); hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI); -- cgit v1.2.3 From a86cd017a40a66b1a3db005bfee4e76a1ae9a432 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 13 Sep 2021 11:04:42 +0300 Subject: RDMA/usnic: Lock VF with mutex instead of spinlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Usnic VF doesn't need lock in atomic context to create QPs, so it is safe to use mutex instead of spinlock. Such change fixes the following smatch error. Smatch static checker warning: lib/kobject.c:289 kobject_set_name_vargs() warn: sleeping in atomic context Fixes: 514aee660df4 ("RDMA: Globally allocate and release QP memory") Link: https://lore.kernel.org/r/2a0e295786c127e518ebee8bb7cafcb819a625f6.1631520231.git.leonro@nvidia.com Reported-by: Dan Carpenter Signed-off-by: Leon Romanovsky Reviewed-by: HÃ¥kon Bugge Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib.h | 2 +- drivers/infiniband/hw/usnic/usnic_ib_main.c | 2 +- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h index 84dd682d2334..b350081aeb5a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib.h +++ b/drivers/infiniband/hw/usnic/usnic_ib.h @@ -90,7 +90,7 @@ struct usnic_ib_dev { struct usnic_ib_vf { struct usnic_ib_dev *pf; - spinlock_t lock; + struct mutex lock; struct usnic_vnic *vnic; unsigned int qp_grp_ref_cnt; struct usnic_ib_pd *pd; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 228e9a36dad0..d346dd48e731 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -572,7 +572,7 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, } vf->pf = pf; - spin_lock_init(&vf->lock); + mutex_init(&vf->lock); mutex_lock(&pf->usdev_lock); list_add_tail(&vf->link, &pf->vf_dev_list); /* diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 06a4e9d4545d..756a83bcff58 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -196,7 +196,7 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp, for (i = 0; dev_list[i]; i++) { dev = dev_list[i]; vf = dev_get_drvdata(dev); - spin_lock(&vf->lock); + mutex_lock(&vf->lock); vnic = vf->vnic; if (!usnic_vnic_check_room(vnic, res_spec)) { usnic_dbg("Found used vnic %s from %s\n", @@ -208,10 +208,10 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp, vf, pd, res_spec, trans_spec); - spin_unlock(&vf->lock); + mutex_unlock(&vf->lock); goto qp_grp_check; } - spin_unlock(&vf->lock); + mutex_unlock(&vf->lock); } usnic_uiom_free_dev_list(dev_list); @@ -220,7 +220,7 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp, /* Try to find resources on an unused vf */ list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) { - spin_lock(&vf->lock); + mutex_lock(&vf->lock); vnic = vf->vnic; if (vf->qp_grp_ref_cnt == 0 && usnic_vnic_check_room(vnic, res_spec) == 0) { @@ -228,10 +228,10 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp, vf, pd, res_spec, trans_spec); - spin_unlock(&vf->lock); + mutex_unlock(&vf->lock); goto qp_grp_check; } - spin_unlock(&vf->lock); + mutex_unlock(&vf->lock); } usnic_info("No free qp grp found on %s\n", @@ -253,9 +253,9 @@ static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) WARN_ON(qp_grp->state != IB_QPS_RESET); - spin_lock(&vf->lock); + mutex_lock(&vf->lock); usnic_ib_qp_grp_destroy(qp_grp); - spin_unlock(&vf->lock); + mutex_unlock(&vf->lock); } static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd) -- cgit v1.2.3 From 7d5cfafe8b4006a75b55c2f1fdfdb363f9a5cc98 Mon Sep 17 00:00:00 2001 From: Guo Zhi Date: Wed, 22 Sep 2021 21:48:57 +0800 Subject: RDMA/hfi1: Fix kernel pointer leak Pointers should be printed with %p or %px rather than cast to 'unsigned long long' and printed with %llx. Change %llx to %p to print the secured pointer. Fixes: 042a00f93aad ("IB/{ipoib,hfi1}: Add a timeout handler for rdma_netdev") Link: https://lore.kernel.org/r/20210922134857.619602-1-qtxuning1999@sjtu.edu.cn Signed-off-by: Guo Zhi Acked-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib_tx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index e74ddbe46589..15b0cb0f363f 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -876,14 +876,14 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) struct hfi1_ipoib_txq *txq = &priv->txqs[q]; u64 completed = atomic64_read(&txq->complete_txreqs); - dd_dev_info(priv->dd, "timeout txq %llx q %u stopped %u stops %d no_desc %d ring_full %d\n", - (unsigned long long)txq, q, + dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n", + txq, q, __netif_subqueue_stopped(dev, txq->q_idx), atomic_read(&txq->stops), atomic_read(&txq->no_desc), atomic_read(&txq->ring_full)); - dd_dev_info(priv->dd, "sde %llx engine %u\n", - (unsigned long long)txq->sde, + dd_dev_info(priv->dd, "sde %p engine %u\n", + txq->sde, txq->sde ? txq->sde->this_idx : 0); dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int); dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n", -- cgit v1.2.3 From cc26aee100588a3f293921342a307b6309ace193 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 27 Sep 2021 20:55:56 +0800 Subject: RDMA/hns: Fix the size setting error when copying CQE in clean_cq() The size of CQE is different for different versions of hardware, so the driver needs to specify the size of CQE explicitly. Fixes: 09a5f210f67e ("RDMA/hns: Add support for CQE in size of 64 Bytes") Link: https://lore.kernel.org/r/20210927125557.15031-2-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a9c00a2e8ebd..d5f3faa1627a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3299,7 +3299,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, dest = get_cqe_v2(hr_cq, (prod_index + nfreed) & hr_cq->ib_cq.cqe); owner_bit = hr_reg_read(dest, CQE_OWNER); - memcpy(dest, cqe, sizeof(*cqe)); + memcpy(dest, cqe, hr_cq->cqe_size); hr_reg_write(dest, CQE_OWNER, owner_bit); } } -- cgit v1.2.3 From e671f0ecfece14940a9bb81981098910ea278cf7 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 27 Sep 2021 20:55:57 +0800 Subject: RDMA/hns: Add the check of the CQE size of the user space If the CQE size of the user space is not the size supported by the hardware, the creation of CQ should be stopped. Fixes: 09a5f210f67e ("RDMA/hns: Add support for CQE in size of 64 Bytes") Link: https://lore.kernel.org/r/20210927125557.15031-3-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 1e9c3c5bee68..d763f097599f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -326,19 +326,30 @@ static void set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector, INIT_LIST_HEAD(&hr_cq->rq_list); } -static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, - struct hns_roce_ib_create_cq *ucmd) +static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, + struct hns_roce_ib_create_cq *ucmd) { struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); - if (udata) { - if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) - hr_cq->cqe_size = ucmd->cqe_size; - else - hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE; - } else { + if (!udata) { hr_cq->cqe_size = hr_dev->caps.cqe_sz; + return 0; + } + + if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) { + if (ucmd->cqe_size != HNS_ROCE_V2_CQE_SIZE && + ucmd->cqe_size != HNS_ROCE_V3_CQE_SIZE) { + ibdev_err(&hr_dev->ib_dev, + "invalid cqe size %u.\n", ucmd->cqe_size); + return -EINVAL; + } + + hr_cq->cqe_size = ucmd->cqe_size; + } else { + hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE; } + + return 0; } int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, @@ -366,7 +377,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd); - set_cqe_size(hr_cq, udata, &ucmd); + ret = set_cqe_size(hr_cq, udata, &ucmd); + if (ret) + return ret; ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { -- cgit v1.2.3