diff options
author | Danielle Ratson <danieller@nvidia.com> | 2021-12-20 12:56:14 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-12-20 11:32:21 +0000 |
commit | 239cdd3f4cb0fe87da95e9a64a41644ee5a744ec (patch) | |
tree | 3bb6ee75c85762ab8b2557d834abf3421e5c6acb | |
parent | e25c060c5f24b19d2596445c6d0cf40da6f13cc1 (diff) |
mlxsw: core: Extend devlink health reporter with new events and parameters
Extend the devlink health reporter registered by mlxsw to report new
health events and their related parameters. These are meant to aid in
debugging of hardware / firmware issues.
Beside the test event ('MLXSW_REG_MFDE_EVENT_ID_TEST') that is triggered
following the devlink health 'test' sub-command, the new events are used
to report the triggering of asserts in firmware code
('MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT') and hardware issues
('MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE').
Each event is accompanied with a severity parameter and per-event
parameters that are meant to help root cause the detected issue.
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/core.c | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index d9f12d9cd0ff..866b9357939b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1709,11 +1709,92 @@ static const struct mlxsw_listener mlxsw_core_health_listener = MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE); static int +mlxsw_core_health_fw_fatal_dump_fatal_cause(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val, tile_v; + int err; + + val = mlxsw_reg_mfde_fatal_cause_id_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "cause_id", val); + if (err) + return err; + tile_v = mlxsw_reg_mfde_fatal_cause_tile_v_get(mfde_pl); + if (tile_v) { + val = mlxsw_reg_mfde_fatal_cause_tile_index_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val); + if (err) + return err; + } + + return 0; +} + +static int +mlxsw_core_health_fw_fatal_dump_fw_assert(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val, tile_v; + int err; + + val = mlxsw_reg_mfde_fw_assert_var0_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var0", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var1_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var1", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var2_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var2", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var3_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var3", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var4_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var4", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_existptr_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "existptr", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_callra_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "callra", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_oe_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val); + if (err) + return err; + tile_v = mlxsw_reg_mfde_fw_assert_tile_v_get(mfde_pl); + if (tile_v) { + val = mlxsw_reg_mfde_fw_assert_tile_index_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val); + if (err) + return err; + } + val = mlxsw_reg_mfde_fw_assert_ext_synd_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", val); + if (err) + return err; + + return 0; +} + +static int mlxsw_core_health_fw_fatal_dump_kvd_im_stop(const char *mfde_pl, struct devlink_fmsg *fmsg) { u32 val; + int err; + val = mlxsw_reg_mfde_kvd_im_stop_oe_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val); + if (err) + return err; val = mlxsw_reg_mfde_kvd_im_stop_pipes_mask_get(mfde_pl); return devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val); } @@ -1729,6 +1810,10 @@ mlxsw_core_health_fw_fatal_dump_crspace_to(const char *mfde_pl, err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val); if (err) return err; + val = mlxsw_reg_mfde_crspace_to_oe_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val); + if (err) + return err; val = mlxsw_reg_mfde_crspace_to_log_id_get(mfde_pl); err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val); if (err) @@ -1774,6 +1859,15 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP: val_str = "KVD insertion machine stopped"; break; + case MLXSW_REG_MFDE_EVENT_ID_TEST: + val_str = "Test"; + break; + case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT: + val_str = "FW assert"; + break; + case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE: + val_str = "Fatal cause"; + break; default: val_str = NULL; } @@ -1782,6 +1876,38 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor if (err) return err; } + + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "severity"); + if (err) + return err; + + val = mlxsw_reg_mfde_severity_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "id", val); + if (err) + return err; + switch (val) { + case MLXSW_REG_MFDE_SEVERITY_FATL: + val_str = "Fatal"; + break; + case MLXSW_REG_MFDE_SEVERITY_NRML: + val_str = "Normal"; + break; + case MLXSW_REG_MFDE_SEVERITY_INTR: + val_str = "Debug"; + break; + default: + val_str = NULL; + } + if (val_str) { + err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); if (err) return err; @@ -1840,6 +1966,11 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP: return mlxsw_core_health_fw_fatal_dump_kvd_im_stop(mfde_pl, fmsg); + case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT: + return mlxsw_core_health_fw_fatal_dump_fw_assert(mfde_pl, fmsg); + case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE: + return mlxsw_core_health_fw_fatal_dump_fatal_cause(mfde_pl, + fmsg); } return 0; |