From a35cd6447effd5c239b564c80fa109d05ff3d114 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 11 May 2020 23:13:28 -0400 Subject: IB/qib: Call kobject_put() when kobject_init_and_add() fails When kobject_init_and_add() returns an error in the function qib_create_port_files(), the function kobject_put() is not called for the corresponding kobject, which potentially leads to memory leak. This patch fixes the issue by calling kobject_put() even if kobject_init_and_add() fails. In addition, the ppd->diagc_kobj is released along with other kobjects when the sysfs is unregistered. Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Link: https://lore.kernel.org/r/20200512031328.189865.48627.stgit@awfm-01.aw.intel.com Cc: Suggested-by: Lin Yi Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_sysfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 568b21eb6ea1..021df0654ba7 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -760,7 +760,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, qib_dev_err(dd, "Skipping linkcontrol sysfs info, (err %d) port %u\n", ret, port_num); - goto bail; + goto bail_link; } kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); @@ -770,7 +770,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, qib_dev_err(dd, "Skipping sl2vl sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_link; + goto bail_sl; } kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); @@ -780,7 +780,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, qib_dev_err(dd, "Skipping diag_counters sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sl; + goto bail_diagc; } kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); @@ -793,7 +793,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, qib_dev_err(dd, "Skipping Congestion Control sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_diagc; + goto bail_cc; } kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); @@ -854,6 +854,7 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd) &cc_table_bin_attr); kobject_put(&ppd->pport_cc_kobj); } + kobject_put(&ppd->diagc_kobj); kobject_put(&ppd->sl2vl_kobj); kobject_put(&ppd->pport_kobj); } -- cgit v1.2.3 From 189277f3814c36133f4ff0352f4b5194a38486b6 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 21 May 2020 10:25:04 +0300 Subject: RDMA/mlx5: Fix NULL pointer dereference in destroy_prefetch_work q_deferred_work isn't initialized when creating an explicit ODP memory region. This can lead to a NULL pointer dereference when user performs asynchronous prefetch MR. Fix it by initializing q_deferred_work for explicit ODP. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 4 PID: 6074 Comm: kworker/u16:6 Not tainted 5.7.0-rc1-for-upstream-perf-2020-04-17_07-03-39-64 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: events_unbound mlx5_ib_prefetch_mr_work [mlx5_ib] RIP: 0010:__wake_up_common+0x49/0x120 Code: 04 89 54 24 0c 89 4c 24 08 74 0a 41 f6 01 04 0f 85 8e 00 00 00 48 8b 47 08 48 83 e8 18 4c 8d 67 08 48 8d 50 18 49 39 d4 74 66 <48> 8b 70 18 31 db 4c 8d 7e e8 eb 17 49 8b 47 18 48 8d 50 e8 49 8d RSP: 0000:ffffc9000097bd88 EFLAGS: 00010082 RAX: ffffffffffffffe8 RBX: ffff888454cd9f90 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000003 RDI: ffff888454cd9f90 RBP: ffffc9000097bdd0 R08: 0000000000000000 R09: ffffc9000097bdd0 R10: 0000000000000000 R11: 0000000000000001 R12: ffff888454cd9f98 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff88846fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000044c19e002 CR4: 0000000000760ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __wake_up_common_lock+0x7a/0xc0 destroy_prefetch_work+0x5a/0x60 [mlx5_ib] mlx5_ib_prefetch_mr_work+0x64/0x80 [mlx5_ib] process_one_work+0x15b/0x360 worker_thread+0x49/0x3d0 kthread+0xf5/0x130 ? rescuer_thread+0x310/0x310 ? kthread_bind+0x10/0x10 ret_from_fork+0x1f/0x30 Fixes: de5ed007a03d ("IB/mlx5: Fix implicit ODP race") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200521072504.567406-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index a401931189b7..44683073be0c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1439,6 +1439,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (is_odp_mr(mr)) { to_ib_umem_odp(mr->umem)->private = mr; + init_waitqueue_head(&mr->q_deferred_work); atomic_set(&mr->num_deferred_work, 0); err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, -- cgit v1.2.3 From db857e6ae548f0f4f4a0f63fffeeedf3cca21f9d Mon Sep 17 00:00:00 2001 From: Qiushi Wu Date: Fri, 22 May 2020 22:04:57 -0500 Subject: RDMA/pvrdma: Fix missing pci disable in pvrdma_pci_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In function pvrdma_pci_probe(), pdev was not disabled in one error path. Thus replace the jump target “err_free_device” by "err_disable_pdev". Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Link: https://lore.kernel.org/r/20200523030457.16160-1-wu000273@umn.edu Signed-off-by: Qiushi Wu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index e580ae9cc55a..780fd2dfc07e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -829,7 +829,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); ret = -ENOMEM; - goto err_free_device; + goto err_disable_pdev; } ret = pci_request_regions(pdev, DRV_NAME); -- cgit v1.2.3 From c85f4abe66bea0b5db8d28d55da760c4fe0a0301 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 27 May 2020 16:55:34 +0300 Subject: RDMA/core: Fix double destruction of uobject Fix use after free when user user space request uobject concurrently for the same object, within the RCU grace period. In that case, remove_handle_idr_uobject() is called twice and we will have an extra put on the uobject which cause use after free. Fix it by leaving the uobject write locked after it was removed from the idr. Call to rdma_lookup_put_uobject with UVERBS_LOOKUP_DESTROY instead of UVERBS_LOOKUP_WRITE will do the work. refcount_t: underflow; use-after-free. WARNING: CPU: 0 PID: 1381 at lib/refcount.c:28 refcount_warn_saturate+0xfe/0x1a0 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 1381 Comm: syz-executor.0 Not tainted 5.5.0-rc3 #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x94/0xce panic+0x234/0x56f __warn+0x1cc/0x1e1 report_bug+0x200/0x310 fixup_bug.part.11+0x32/0x80 do_error_trap+0xd3/0x100 do_invalid_op+0x31/0x40 invalid_op+0x1e/0x30 RIP: 0010:refcount_warn_saturate+0xfe/0x1a0 Code: 0f 0b eb 9b e8 23 f6 6d ff 80 3d 6c d4 19 03 00 75 8d e8 15 f6 6d ff 48 c7 c7 c0 02 55 bd c6 05 57 d4 19 03 01 e8 a2 58 49 ff <0f> 0b e9 6e ff ff ff e8 f6 f5 6d ff 80 3d 42 d4 19 03 00 0f 85 5c RSP: 0018:ffffc90002df7b98 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff88810f6a193c RCX: ffffffffba649009 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88811b0283cc RBP: 0000000000000003 R08: ffffed10236060e3 R09: ffffed10236060e3 R10: 0000000000000001 R11: ffffed10236060e2 R12: ffff88810f6a193c R13: ffffc90002df7d60 R14: 0000000000000000 R15: ffff888116ae6a08 uverbs_uobject_put+0xfd/0x140 __uobj_perform_destroy+0x3d/0x60 ib_uverbs_close_xrcd+0x148/0x170 ib_uverbs_write+0xaa5/0xdf0 __vfs_write+0x7c/0x100 vfs_write+0x168/0x4a0 ksys_write+0xc8/0x200 do_syscall_64+0x9c/0x390 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x465b49 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f759d122c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000073bfa8 RCX: 0000000000465b49 RDX: 000000000000000c RSI: 0000000020000080 RDI: 0000000000000003 RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f759d1236bc R13: 00000000004ca27c R14: 000000000070de40 R15: 00000000ffffffff Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: 0x39400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Fixes: 7452a3c745a2 ("IB/uverbs: Allow RDMA_REMOVE_DESTROY to work concurrently with disassociate") Link: https://lore.kernel.org/r/20200527135534.482279-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 20 +++++++++++++------- include/rdma/uverbs_std_types.h | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index bf8e149d3191..e0a5e897e4b1 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -153,9 +153,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, uobj->context = NULL; /* - * For DESTROY the usecnt is held write locked, the caller is expected - * to put it unlock and put the object when done with it. Only DESTROY - * can remove the IDR handle. + * For DESTROY the usecnt is not changed, the caller is expected to + * manage it via uobj_put_destroy(). Only DESTROY can remove the IDR + * handle. */ if (reason != RDMA_REMOVE_DESTROY) atomic_set(&uobj->usecnt, 0); @@ -187,7 +187,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, /* * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY * sequence. It should only be used from command callbacks. On success the - * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This + * caller must pair this with uobj_put_destroy(). This * version requires the caller to have already obtained an * LOOKUP_DESTROY uobject kref. */ @@ -198,6 +198,13 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) down_read(&ufile->hw_destroy_rwsem); + /* + * Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left + * write locked as the callers put it back with UVERBS_LOOKUP_DESTROY. + * This is because any other concurrent thread can still see the object + * in the xarray due to RCU. Leaving it locked ensures nothing else will + * touch it. + */ ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE); if (ret) goto out_unlock; @@ -216,7 +223,7 @@ out_unlock: /* * uobj_get_destroy destroys the HW object and returns a handle to the uobj * with a NULL object pointer. The caller must pair this with - * uverbs_put_destroy. + * uobj_put_destroy(). */ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, u32 id, struct uverbs_attr_bundle *attrs) @@ -250,8 +257,7 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, uobj = __uobj_get_destroy(obj, id, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); - - rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); + uobj_put_destroy(uobj); return 0; } diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 1b28ce1aba07..325fdaa3bb66 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -88,7 +88,7 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, static inline void uobj_put_destroy(struct ib_uobject *uobj) { - rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); } static inline void uobj_put_read(struct ib_uobject *uobj) -- cgit v1.2.3 From 1acba6a817852d4aa7916d5c4f2c82f702ee9224 Mon Sep 17 00:00:00 2001 From: Valentine Fatiev Date: Wed, 27 May 2020 16:47:05 +0300 Subject: IB/ipoib: Fix double free of skb in case of multicast traffic in CM mode When connected mode is set, and we have connected and datagram traffic in parallel, ipoib might crash with double free of datagram skb. The current mechanism assumes that the order in the completion queue is the same as the order of sent packets for all QPs. Order is kept only for specific QP, in case of mixed UD and CM traffic we have few QPs (one UD and few CM's) in parallel. The problem: ---------------------------------------------------------- Transmit queue: ----------------- UD skb pointer kept in queue itself, CM skb kept in spearate queue and uses transmit queue as a placeholder to count the number of total transmitted packets. 0 1 2 3 4 5 6 7 8 9 10 11 12 13 .........127 ------------------------------------------------------------ NL ud1 UD2 CM1 ud3 cm2 cm3 ud4 cm4 ud5 NL NL NL ........... ------------------------------------------------------------ ^ ^ tail head Completion queue (problematic scenario) - the order not the same as in the transmit queue: 1 2 3 4 5 6 7 8 9 ------------------------------------ ud1 CM1 UD2 ud3 cm2 cm3 ud4 cm4 ud5 ------------------------------------ 1. CM1 'wc' processing - skb freed in cm separate ring. - tx_tail of transmit queue increased although UD2 is not freed. Now driver assumes UD2 index is already freed and it could be used for new transmitted skb. 0 1 2 3 4 5 6 7 8 9 10 11 12 13 .........127 ------------------------------------------------------------ NL NL UD2 CM1 ud3 cm2 cm3 ud4 cm4 ud5 NL NL NL ........... ------------------------------------------------------------ ^ ^ ^ (Bad)tail head (Bad - Could be used for new SKB) In this case (due to heavy load) UD2 skb pointer could be replaced by new transmitted packet UD_NEW, as the driver assumes its free. At this point we will have to process two 'wc' with same index but we have only one pointer to free. During second attempt to free the same skb we will have NULL pointer exception. 2. UD2 'wc' processing - skb freed according the index we got from 'wc', but it was already overwritten by mistake. So actually the skb that was released is the skb of the new transmitted packet and not the original one. 3. UD_NEW 'wc' processing - attempt to free already freed skb. NUll pointer exception. The fix: ----------------------------------------------------------------------- The fix is to stop using the UD ring as a placeholder for CM packets, the cyclic ring variables tx_head and tx_tail will manage the UD tx_ring, a new cyclic variables global_tx_head and global_tx_tail are introduced for managing and counting the overall outstanding sent packets, then the send queue will be stopped and waken based on these variables only. Note that no locking is needed since global_tx_head is updated in the xmit flow and global_tx_tail is updated in the NAPI flow only. A previous attempt tried to use one variable to count the outstanding sent packets, but it did not work since xmit and NAPI flows can run at the same time and the counter will be updated wrongly. Thus, we use the same simple cyclic head and tail scheme that we have today for the UD tx_ring. Fixes: 2c104ea68350 ("IB/ipoib: Get rid of the tx_outstanding variable in all modes") Link: https://lore.kernel.org/r/20200527134705.480068-1-leon@kernel.org Signed-off-by: Valentine Fatiev Signed-off-by: Alaa Hleihel Signed-off-by: Leon Romanovsky Acked-by: Doug Ledford Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib.h | 4 ++++ drivers/infiniband/ulp/ipoib/ipoib_cm.c | 15 +++++++++------ drivers/infiniband/ulp/ipoib/ipoib_ib.c | 9 +++++++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 10 ++++++---- 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index e188a95984b5..9a3379c49541 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -377,8 +377,12 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; struct ipoib_tx_buf *tx_ring; + /* cyclic ring variables for managing tx_ring, for UD only */ unsigned int tx_head; unsigned int tx_tail; + /* cyclic ring variables for counting overall outstanding send WRs */ + unsigned int global_tx_head; + unsigned int global_tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_ud_wr tx_wr; struct ib_wc send_wc[MAX_SEND_CQE]; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index c59e00a0881f..9bf0fa30df28 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -756,7 +756,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ return; } - if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) { + if ((priv->global_tx_head - priv->global_tx_tail) == + ipoib_sendq_size - 1) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); netif_stop_queue(dev); @@ -786,7 +787,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ } else { netif_trans_update(dev); ++tx->tx_head; - ++priv->tx_head; + ++priv->global_tx_head; } } @@ -820,10 +821,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) netif_tx_lock(dev); ++tx->tx_tail; - ++priv->tx_tail; + ++priv->global_tx_tail; if (unlikely(netif_queue_stopped(dev) && - (priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 && + ((priv->global_tx_head - priv->global_tx_tail) <= + ipoib_sendq_size >> 1) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) netif_wake_queue(dev); @@ -1232,8 +1234,9 @@ timeout: dev_kfree_skb_any(tx_req->skb); netif_tx_lock_bh(p->dev); ++p->tx_tail; - ++priv->tx_tail; - if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) && + ++priv->global_tx_tail; + if (unlikely((priv->global_tx_head - priv->global_tx_tail) <= + ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(p->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index c332b4761816..da3c5315bbb5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -407,9 +407,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; + ++priv->global_tx_tail; if (unlikely(netif_queue_stopped(dev) && - ((priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1) && + ((priv->global_tx_head - priv->global_tx_tail) <= + ipoib_sendq_size >> 1) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) netif_wake_queue(dev); @@ -634,7 +636,8 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, else priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; /* increase the tx_head after send success, but use it for queue state */ - if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) { + if ((priv->global_tx_head - priv->global_tx_tail) == + ipoib_sendq_size - 1) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); netif_stop_queue(dev); } @@ -662,6 +665,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, rc = priv->tx_head; ++priv->tx_head; + ++priv->global_tx_head; } return rc; } @@ -807,6 +811,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; + ++priv->global_tx_tail; } for (i = 0; i < ipoib_recvq_size; ++i) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 81b8227214f1..ceec24d45185 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1184,9 +1184,11 @@ static void ipoib_timeout(struct net_device *dev, unsigned int txqueue) ipoib_warn(priv, "transmit timeout: latency %d msecs\n", jiffies_to_msecs(jiffies - dev_trans_start(dev))); - ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", - netif_queue_stopped(dev), - priv->tx_head, priv->tx_tail); + ipoib_warn(priv, + "queue stopped %d, tx_head %u, tx_tail %u, global_tx_head %u, global_tx_tail %u\n", + netif_queue_stopped(dev), priv->tx_head, priv->tx_tail, + priv->global_tx_head, priv->global_tx_tail); + /* XXX reset QP, etc. */ } @@ -1701,7 +1703,7 @@ static int ipoib_dev_init_default(struct net_device *dev) goto out_rx_ring_cleanup; } - /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ + /* priv->tx_head, tx_tail and global_tx_tail/head are already 0 */ if (ipoib_transport_dev_init(dev, priv->ca)) { pr_warn("%s: ipoib_transport_dev_init failed\n", -- cgit v1.2.3