summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2021-12-10 17:01:15 +0800
committerAlex Deucher <alexander.deucher@amd.com>2021-12-28 16:02:32 -0500
commitf4409ee84658ff95d92daace9f094fa6bf80b0e3 (patch)
tree036ed91e20a1cc0cf234087266cccf9cf0d5b173
parentd764fb2af6cd92eee080978856f24b0cc6d555ad (diff)
drm/amdgpu: add gpu reset control for umc page retirement
Add a reset parameter for umc page retirement, let user decide whether call gpu reset in umc page retirement. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h5
2 files changed, 15 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 6e4bea012ea4..0c33f367a4e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -23,6 +23,13 @@
#include "amdgpu_ras.h"
+static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry)
+{
+ return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true);
+}
+
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
{
int r;
@@ -88,9 +95,10 @@ void amdgpu_umc_ras_fini(struct amdgpu_device *adev)
}
}
-int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
void *ras_error_status,
- struct amdgpu_iv_entry *entry)
+ struct amdgpu_iv_entry *entry,
+ bool reset)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -164,7 +172,8 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs);
}
- amdgpu_ras_reset_gpu(adev);
+ if (reset)
+ amdgpu_ras_reset_gpu(adev);
}
kfree(err_data->err_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 9e40bade0a68..8d18d5121f66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -78,9 +78,10 @@ struct amdgpu_umc {
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
-int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
void *ras_error_status,
- struct amdgpu_iv_entry *entry);
+ struct amdgpu_iv_entry *entry,
+ bool reset);
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);