drm/i915: Only record active and pending requests upon a GPU hang

There is no other state pertaining to the completed requests in the hang, other than gleamed through the ringbuffer, so including the expired requests in the list of outstanding requests simply adds noise. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1471254551-25805-31-git-send-email-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2016-08-15 10:49:10 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2016-08-15 11:01:18 +0100
commit: 57bc699d43390aab7a21f3f2cee1f13cd31fd0fd (patch)
tree: 503742686260547c2f94a35a149958ed8192117c /drivers/gpu/drm/i915/i915_gpu_error.c
parent: 03382dfb633c1c1ac85b3d81ddd2256dcbc8f353 (diff)
1 files changed, 61 insertions, 48 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 0f0b65214ef1..776818b86c0c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1060,12 +1060,68 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
 	}
 }
 
+static void engine_record_requests(struct intel_engine_cs *engine,
+				   struct drm_i915_gem_request *first,
+				   struct drm_i915_error_engine *ee)
+{
+	struct drm_i915_gem_request *request;
+	int count;
+
+	count = 0;
+	request = first;
+	list_for_each_entry_from(request, &engine->request_list, link)
+		count++;
+	if (!count)
+		return;
+
+	ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
+	if (!ee->requests)
+		return;
+
+	ee->num_requests = count;
+
+	count = 0;
+	request = first;
+	list_for_each_entry_from(request, &engine->request_list, link) {
+		struct drm_i915_error_request *erq;
+
+		if (count >= ee->num_requests) {
+			/*
+			 * If the ring request list was changed in
+			 * between the point where the error request
+			 * list was created and dimensioned and this
+			 * point then just exit early to avoid crashes.
+			 *
+			 * We don't need to communicate that the
+			 * request list changed state during error
+			 * state capture and that the error state is
+			 * slightly incorrect as a consequence since we
+			 * are typically only interested in the request
+			 * list state at the point of error state
+			 * capture, not in any changes happening during
+			 * the capture.
+			 */
+			break;
+		}
+
+		erq = &ee->requests[count++];
+		erq->seqno = request->fence.seqno;
+		erq->jiffies = request->emitted_jiffies;
+		erq->head = request->head;
+		erq->tail = request->tail;
+
+		rcu_read_lock();
+		erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
+		rcu_read_unlock();
+	}
+	ee->num_requests = count;
+}
+
 static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				  struct drm_i915_error_state *error)
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct drm_i915_gem_request *request;
-	int i, count;
+	int i;
 
 	error->semaphore =
 		i915_error_object_create(dev_priv, dev_priv->semaphore);
@@ -1073,6 +1129,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
 		struct intel_engine_cs *engine = &dev_priv->engine[i];
 		struct drm_i915_error_engine *ee = &error->engine[i];
+		struct drm_i915_gem_request *request;
 
 		ee->pid = -1;
 		ee->engine_id = -1;
@@ -1131,6 +1188,8 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 			ee->cpu_ring_tail = ring->tail;
 			ee->ringbuffer =
 				i915_error_object_create(dev_priv, ring->vma);
+
+			engine_record_requests(engine, request, ee);
 		}
 
 		ee->hws_page =
@@ -1139,52 +1198,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 
 		ee->wa_ctx =
 			i915_error_object_create(dev_priv, engine->wa_ctx.vma);
-
-		count = 0;
-		list_for_each_entry(request, &engine->request_list, link)
-			count++;
-
-		ee->num_requests = count;
-		ee->requests =
-			kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
-		if (!ee->requests) {
-			ee->num_requests = 0;
-			continue;
-		}
-
-		count = 0;
-		list_for_each_entry(request, &engine->request_list, link) {
-			struct drm_i915_error_request *erq;
-
-			if (count >= ee->num_requests) {
-				/*
-				 * If the ring request list was changed in
-				 * between the point where the error request
-				 * list was created and dimensioned and this
-				 * point then just exit early to avoid crashes.
-				 *
-				 * We don't need to communicate that the
-				 * request list changed state during error
-				 * state capture and that the error state is
-				 * slightly incorrect as a consequence since we
-				 * are typically only interested in the request
-				 * list state at the point of error state
-				 * capture, not in any changes happening during
-				 * the capture.
-				 */
-				break;
-			}
-
-			erq = &ee->requests[count++];
-			erq->seqno = request->fence.seqno;
-			erq->jiffies = request->emitted_jiffies;
-			erq->head = request->head;
-			erq->tail = request->tail;
-
-			rcu_read_lock();
-			erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
-			rcu_read_unlock();
-		}
 	}
 }
author	Chris Wilson <chris@chris-wilson.co.uk>	2016-08-15 10:49:10 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2016-08-15 11:01:18 +0100
commit	57bc699d43390aab7a21f3f2cee1f13cd31fd0fd (patch)
tree	503742686260547c2f94a35a149958ed8192117c /drivers/gpu/drm/i915/i915_gpu_error.c
parent	03382dfb633c1c1ac85b3d81ddd2256dcbc8f353 (diff)