Merge tag 'v5.14-rc4' into media_tree

Linux 5.14-rc4 * tag 'v5.14-rc4': (948 commits) Linux 5.14-rc4 pipe: make pipe writes always wake up readers Revert "perf map: Fix dso->nsinfo refcounting" mm/memcg: fix NULL pointer dereference in memcg_slab_free_hook() slub: fix unreclaimable slab stat for bulk free mm/migrate: fix NR_ISOLATED corruption on 64-bit mm: memcontrol: fix blocking rstat function called from atomic cgroup1 thresholding code ocfs2: issue zeroout to EOF blocks ocfs2: fix zero out valid data lib/test_string.c: move string selftest in the Runtime Testing menu gve: Update MAINTAINERS list arch: Kconfig: clean up obsolete use of HAVE_IDE can: esd_usb2: fix memory leak can: ems_usb: fix memory leak can: usb_8dev: fix memory leak can: mcba_usb_start(): add missing urb->transfer_dma initialization can: hi311x: fix a signedness bug in hi3110_cmd() MAINTAINERS: add Yasushi SHOJI as reviewer for the Microchip CAN BUS Analyzer Tool driver scsi: fas216: Fix fall-through warning for Clang scsi: acornscsi: Fix fall-through warning for clang ...
author: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> 2021-08-05 16:28:43 +0200
committer: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> 2021-08-05 16:28:43 +0200
commit: 9c3a0f285248899dfa81585bc5d5bc9ebdb8fead (patch)
tree: 908decd51fbb0e1c774444b002e3b89dee3fae26 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent: bfee75f73c37a2f46a6326eaa06f5db701f76f01 (diff)
parent: c500bee1c5b2f1d59b1081ac879d73268ab0ff17 (diff)
1 files changed, 35 insertions, 14 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index c13b02caf8c3..fc66aca28594 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -809,7 +809,7 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
 
 /* query/inject/cure begin */
 int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
-	struct ras_query_if *info)
+				  struct ras_query_if *info)
 {
 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
 	struct ras_err_data err_data = {0, 0, 0, NULL};
@@ -1043,17 +1043,32 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 	return ret;
 }
 
-/* get the total error counts on all IPs */
-void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
-				  unsigned long *ce_count,
-				  unsigned long *ue_count)
+/**
+ * amdgpu_ras_query_error_count -- Get error counts of all IPs
+ * adev: pointer to AMD GPU device
+ * ce_count: pointer to an integer to be set to the count of correctible errors.
+ * ue_count: pointer to an integer to be set to the count of uncorrectible
+ * errors.
+ *
+ * If set, @ce_count or @ue_count, count and return the corresponding
+ * error counts in those integer pointers. Return 0 if the device
+ * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS.
+ */
+int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+				 unsigned long *ce_count,
+				 unsigned long *ue_count)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_manager *obj;
 	unsigned long ce, ue;
 
 	if (!adev->ras_enabled || !con)
-		return;
+		return -EOPNOTSUPP;
+
+	/* Don't count since no reporting.
+	 */
+	if (!ce_count && !ue_count)
+		return 0;
 
 	ce = 0;
 	ue = 0;
@@ -1061,9 +1076,11 @@ void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 		struct ras_query_if info = {
 			.head = obj->head,
 		};
+		int res;
 
-		if (amdgpu_ras_query_error_status(adev, &info))
-			return;
+		res = amdgpu_ras_query_error_status(adev, &info);
+		if (res)
+			return res;
 
 		ce += info.ce_count;
 		ue += info.ue_count;
@@ -1074,6 +1091,8 @@ void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 
 	if (ue_count)
 		*ue_count = ue;
+
+	return 0;
 }
 /* query/inject/cure end */
 
@@ -2137,9 +2156,10 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
 
 	/* Cache new values.
 	 */
-	amdgpu_ras_query_error_count(adev, &ce_count, &ue_count);
-	atomic_set(&con->ras_ce_count, ce_count);
-	atomic_set(&con->ras_ue_count, ue_count);
+	if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) {
+		atomic_set(&con->ras_ce_count, ce_count);
+		atomic_set(&con->ras_ue_count, ue_count);
+	}
 
 	pm_runtime_mark_last_busy(dev->dev);
 Out:
@@ -2312,9 +2332,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
 
 	/* Those are the cached values at init.
 	 */
-	amdgpu_ras_query_error_count(adev, &ce_count, &ue_count);
-	atomic_set(&con->ras_ce_count, ce_count);
-	atomic_set(&con->ras_ue_count, ue_count);
+	if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) {
+		atomic_set(&con->ras_ce_count, ce_count);
+		atomic_set(&con->ras_ue_count, ue_count);
+	}
 
 	return 0;
 cleanup:
author	Mauro Carvalho Chehab <mchehab+huawei@kernel.org>	2021-08-05 16:28:43 +0200
committer	Mauro Carvalho Chehab <mchehab+huawei@kernel.org>	2021-08-05 16:28:43 +0200
commit	9c3a0f285248899dfa81585bc5d5bc9ebdb8fead (patch)
tree	908decd51fbb0e1c774444b002e3b89dee3fae26 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent	bfee75f73c37a2f46a6326eaa06f5db701f76f01 (diff)
parent	c500bee1c5b2f1d59b1081ac879d73268ab0ff17 (diff)