diff options
author | David S. Miller <davem@davemloft.net> | 2020-07-03 12:33:16 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-07-03 12:33:16 -0700 |
commit | de2afdcaf4073dc4ebe6fa7238bc59b0f55568be (patch) | |
tree | 6aac0ac89a9f30d541375e75e1d0ac4e6f451c9d | |
parent | 8c8278a5b1a81e099ba883d8a0f9e3df9bdb1a74 (diff) | |
parent | e62055642797a6de80f3576c18e212cbbf5b4361 (diff) |
Merge tag 'mlx5-updates-2020-07-02' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says:
====================
mlx5-updates-2020-07-02
Rx and Tx devlink health reporters enhancements.
1) Code cleanup
2) devlink output format improvements
3) Print more useful info on devlink health diagnose output
4) TX timeout recovery, on a single SQ recover failure, stop the loop
and reset all rings (re-open netdev).
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en.h | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/health.c | 58 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/health.h | 16 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 145 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 80 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 46 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/wq.h | 15 |
9 files changed, 266 insertions, 132 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2957edb7e0b7..c44669102626 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -852,26 +852,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); -static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) -{ - switch (rq->wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return mlx5_wq_ll_get_size(&rq->mpwqe.wq); - default: - return mlx5_wq_cyc_get_size(&rq->wqe.wq); - } -} - -static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) -{ - switch (rq->wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return rq->mpwqe.wq.cur_sz; - default: - return rq->wqe.wq.cur_sz; - } -} - bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 7283443868f3..3dc200bcfabd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -5,7 +5,7 @@ #include "lib/eq.h" #include "lib/mlx5.h" -int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) +int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) { int err; @@ -20,7 +20,7 @@ int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) return 0; } -int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg) +int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg) { int err; @@ -35,7 +35,7 @@ int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg) return 0; } -int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) { struct mlx5e_priv *priv = cq->channel->priv; u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; @@ -50,7 +50,7 @@ int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); hw_status = MLX5_GET(cqc, cqc, status); - err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); if (err) return err; @@ -62,14 +62,22 @@ int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; return 0; } -int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) { u8 cq_log_stride; u32 cq_sz; @@ -78,7 +86,7 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg * cq_sz = mlx5_cqwq_get_size(&cq->wq); cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); - err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); if (err) return err; @@ -90,26 +98,48 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg * if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; return 0; } -int mlx5e_health_create_reporters(struct mlx5e_priv *priv) +int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg) { int err; - err = mlx5e_reporter_tx_create(priv); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ"); if (err) return err; - err = mlx5e_reporter_rx_create(priv); + err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn); if (err) return err; - return 0; + err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", eq->core.nent); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +void mlx5e_health_create_reporters(struct mlx5e_priv *priv) +{ + mlx5e_reporter_tx_create(priv); + mlx5e_reporter_rx_create(priv); } void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) @@ -291,7 +321,7 @@ int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl); if (err) return err; @@ -303,7 +333,7 @@ int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index 38f97f79ef16..b9aadddfd000 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -16,23 +16,25 @@ static inline bool cqe_syndrome_needs_recover(u8 syndrome) syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR; } -int mlx5e_reporter_tx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_create(struct mlx5e_priv *priv); void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq); int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq); -int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); -int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); -int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); -int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg); +int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg); +int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); +int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg); -int mlx5e_reporter_rx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_rx_create(struct mlx5e_priv *priv); void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 +#define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000 struct mlx5e_err_ctx { int (*recover)(void *ctx); @@ -46,7 +48,7 @@ int mlx5e_health_recover_channels(struct mlx5e_priv *priv); int mlx5e_health_report(struct mlx5e_priv *priv, struct devlink_health_reporter *reporter, char *err_str, struct mlx5e_err_ctx *err_ctx); -int mlx5e_health_create_reporters(struct mlx5e_priv *priv); +void mlx5e_health_create_reporters(struct mlx5e_priv *priv); void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv); void mlx5e_health_channels_update(struct mlx5e_priv *priv); int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index c209579fc213..32ed1067e6dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -3,6 +3,7 @@ #include "health.h" #include "params.h" +#include "txrx.h" static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) { @@ -29,7 +30,8 @@ out: static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) { - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + unsigned long exp_time = jiffies + + msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); while (time_before(jiffies, exp_time)) { if (icosq->cc == icosq->pc) @@ -123,25 +125,9 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) { - struct mlx5_core_dev *mdev; - struct net_device *dev; - struct mlx5e_rq *rq; - u8 state; + struct mlx5e_rq *rq = ctx; int err; - rq = ctx; - mdev = rq->mdev; - dev = rq->netdev; - err = mlx5e_query_rq_state(mdev, rq->rqn, &state); - if (err) { - netdev_err(dev, "Failed to query RQ 0x%x state. err = %d\n", - rq->rqn, err); - goto out; - } - - if (state != MLX5_RQC_STATE_ERR) - goto out; - mlx5e_deactivate_rq(rq); mlx5e_free_rx_descs(rq); @@ -191,19 +177,71 @@ static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, mlx5e_health_recover_channels(priv); } +static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, + struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "WQE size", + mlx5_wq_cyc_get_size(&icosq->wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, struct devlink_fmsg *fmsg) { struct mlx5e_priv *priv = rq->channel->priv; - struct mlx5e_params *params; struct mlx5e_icosq *icosq; u8 icosq_hw_state; + u16 wqe_counter; int wqes_sz; u8 hw_state; u16 wq_head; int err; - params = &priv->channels.params; icosq = &rq->channel->icosq; err = mlx5e_query_rq_state(priv->mdev, rq->rqn, &hw_state); if (err) @@ -214,8 +252,8 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, return err; wqes_sz = mlx5e_rqwq_get_cur_sz(rq); - wq_head = params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - rq->mpwqe.wq.head : mlx5_wq_cyc_get_head(&rq->wqe.wq); + wq_head = mlx5e_rqwq_get_head(rq); + wqe_counter = mlx5e_rqwq_get_wqe_counter(rq); err = devlink_fmsg_obj_nest_start(fmsg); if (err) @@ -237,6 +275,10 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, if (err) return err; + err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz); if (err) return err; @@ -245,11 +287,15 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, if (err) return err; - err = devlink_fmsg_u8_pair_put(fmsg, "ICOSQ HW state", icosq_hw_state); + err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg); if (err) return err; - err = mlx5e_reporter_cq_diagnose(&rq->cq, fmsg); + err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg); + if (err) + return err; + + err = mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg); if (err) return err; @@ -279,11 +325,11 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, rq_sz = mlx5e_rqwq_get_size(generic_rq); rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); - err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common config"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config"); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); if (err) goto unlock; @@ -299,15 +345,15 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_cq_common_diag_fmsg(&generic_rq->cq, fmsg); if (err) goto unlock; - err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) goto unlock; @@ -340,7 +386,7 @@ static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); if (err) return err; @@ -350,15 +396,15 @@ static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_ if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "ICOSQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); if (err) return err; @@ -370,11 +416,11 @@ static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_ if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); if (err) return err; @@ -385,11 +431,11 @@ static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_ if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - return mlx5e_reporter_named_obj_nest_end(fmsg); + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, @@ -402,7 +448,7 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); if (err) return err; @@ -412,15 +458,15 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); if (err) return err; @@ -432,11 +478,11 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "receive_buff"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff"); if (err) return err; @@ -446,11 +492,11 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - return mlx5e_reporter_named_obj_nest_end(fmsg); + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, @@ -462,7 +508,7 @@ static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); if (err) return err; @@ -472,7 +518,7 @@ static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; @@ -563,7 +609,7 @@ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { #define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 -int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) +void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) { struct devlink *devlink = priv_to_devlink(priv->mdev); struct devlink_health_reporter *reporter; @@ -575,10 +621,9 @@ int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", PTR_ERR(reporter)); - return PTR_ERR(reporter); + return; } priv->rx_reporter = reporter; - return 0; } void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 9805fc085512..826584380216 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -5,7 +5,8 @@ static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + unsigned long exp_time = jiffies + + msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); while (time_before(jiffies, exp_time)) { if (sq->cc == sq->pc) @@ -82,17 +83,40 @@ out: return err; } +struct mlx5e_tx_timeout_ctx { + struct mlx5e_txqsq *sq; + signed int status; +}; + static int mlx5e_tx_reporter_timeout_recover(void *ctx) { + struct mlx5e_tx_timeout_ctx *to_ctx; + struct mlx5e_priv *priv; struct mlx5_eq_comp *eq; struct mlx5e_txqsq *sq; int err; - sq = ctx; + to_ctx = ctx; + sq = to_ctx->sq; eq = sq->cq.mcq.eq; + priv = sq->channel->priv; err = mlx5e_health_channel_eq_recover(eq, sq->channel); - if (err) - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + if (!err) { + to_ctx->status = 0; /* this sq recovered */ + return err; + } + + err = mlx5e_safe_reopen_channels(priv); + if (!err) { + to_ctx->status = 1; /* all channels recovered */ + return err; + } + + to_ctx->status = err; + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_err(priv->netdev, + "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", + err); return err; } @@ -165,7 +189,11 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, if (err) return err; - err = mlx5e_reporter_cq_diagnose(&sq->cq, fmsg); + err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); + if (err) + return err; + + err = mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); if (err) return err; @@ -194,11 +222,11 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq); sq_stride = MLX5_SEND_WQE_BB; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common Config"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); if (err) goto unlock; @@ -210,15 +238,15 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, if (err) goto unlock; - err = mlx5e_reporter_cq_common_diagnose(&generic_sq->cq, fmsg); + err = mlx5e_health_cq_common_diag_fmsg(&generic_sq->cq, fmsg); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) goto unlock; @@ -256,7 +284,7 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); if (err) return err; @@ -266,15 +294,15 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); if (err) return err; @@ -286,11 +314,11 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); if (err) return err; @@ -300,11 +328,11 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - return mlx5e_reporter_named_obj_nest_end(fmsg); + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, @@ -316,7 +344,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); if (err) return err; @@ -326,7 +354,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; @@ -384,9 +412,11 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) { struct mlx5e_priv *priv = sq->channel->priv; char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_tx_timeout_ctx to_ctx = {}; struct mlx5e_err_ctx err_ctx = {}; - err_ctx.ctx = sq; + to_ctx.sq = sq; + err_ctx.ctx = &to_ctx; err_ctx.recover = mlx5e_tx_reporter_timeout_recover; err_ctx.dump = mlx5e_tx_reporter_dump_sq; snprintf(err_str, sizeof(err_str), @@ -394,7 +424,8 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, jiffies_to_usecs(jiffies - sq->txq->trans_start)); - return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); + return to_ctx.status; } static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { @@ -406,7 +437,7 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 -int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) +void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) { struct devlink_health_reporter *reporter; struct mlx5_core_dev *mdev = priv->mdev; @@ -421,10 +452,9 @@ int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", PTR_ERR(reporter)); - return PTR_ERR(reporter); + return; } priv->tx_reporter = reporter; - return 0; } void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index e9d4a61b6bbb..cf425a60cddc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -268,7 +268,7 @@ static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) } } -static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 sqn, +static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 qn, struct mlx5_err_cqe *err_cqe) { struct mlx5_cqwq *wq = &cq->wq; @@ -277,13 +277,53 @@ static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 sqn, ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1); netdev_err(cq->channel->netdev, - "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", - cq->mcq.cqn, ci, sqn, + "Error cqe on cqn 0x%x, ci 0x%x, qn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + cq->mcq.cqn, ci, qn, get_cqe_opcode((struct mlx5_cqe64 *)err_cqe), err_cqe->syndrome, err_cqe->vendor_err_synd); mlx5_dump_err_cqe(cq->mdev, err_cqe); } +static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_size(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_size(&rq->wqe.wq); + } +} + +static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return rq->mpwqe.wq.cur_sz; + default: + return rq->wqe.wq.cur_sz; + } +} + +static inline u16 mlx5e_rqwq_get_head(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_head(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_head(&rq->wqe.wq); + } +} + +static inline u16 mlx5e_rqwq_get_wqe_counter(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_counter(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_counter(&rq->wqe.wq); + } +} + /* SW parser related functions */ struct mlx5e_swp_spec { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 046cfb0ea180..b04c8572adea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4367,8 +4367,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, tx_timeout_work); - bool report_failed = false; - int err; int i; rtnl_lock(); @@ -4386,18 +4384,10 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) continue; if (mlx5e_reporter_tx_timeout(sq)) - report_failed = true; + /* break if tried to reopened channels */ + break; } - if (!report_failed) - goto unlock; - - err = mlx5e_safe_reopen_channels(priv); - if (err) - netdev_err(priv->netdev, - "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", - err); - unlock: mutex_unlock(&priv->state_lock); rtnl_unlock(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 8b42f729a4f7..350f9c54e508 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1187,8 +1187,10 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; if (cqe_syndrome_needs_recover(err_cqe->syndrome) && - !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) + !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) { + mlx5e_dump_error_cqe(&rq->cq, rq->rqn, err_cqe); queue_work(rq->channel->priv->wq, &rq->recover_work); + } } void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 4cadc336593f..e5c4dcd1425e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -172,6 +172,11 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) return !equal && !smaller; } +static inline u16 mlx5_wq_cyc_get_counter(struct mlx5_wq_cyc *wq) +{ + return wq->wqe_ctr; +} + static inline u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) { return wq->fbc.sz_m1 + 1; @@ -290,4 +295,14 @@ static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq) *wq->db = cpu_to_be32(wq->wqe_ctr); } +static inline u16 mlx5_wq_ll_get_head(struct mlx5_wq_ll *wq) +{ + return wq->head; +} + +static inline u16 mlx5_wq_ll_get_counter(struct mlx5_wq_ll *wq) +{ + return wq->wqe_ctr; +} + #endif /* __MLX5_WQ_H__ */ |