tests/gem_ringfill: Exercise all rings

On SandyBridge, the BLT commands were split from the RENDER commands as well as the BSD split inherited from Ironlake. So we need to make sure we do exercise each ring, and in order to do so we also need to make sure each batch takes longer to execute than it takes for us to submit it. v2: Exercise each ring sequentially. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2012-02-20 22:53:26 +0000
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2012-02-22 10:48:03 +0100
commit: fa6c2757feb18b17120310fe315ed32594ff326c (patch)
tree: 57d8ef3c70a368369291c74288c2f7e396d983e6 /tests
parent: 5cf555868214ac6d55d8102d3198813aadc37853 (diff)
1 files changed, 130 insertions, 55 deletions
diff --git a/tests/gem_ringfill.c b/tests/gem_ringfill.c
index 685a010b..2d00f065 100644
--- a/tests/gem_ringfill.c
+++ b/tests/gem_ringfill.c
@@ -41,50 +41,98 @@
 #include <errno.h>
 #include <sys/stat.h>
 #include <sys/time.h>
+
 #include "drm.h"
 #include "i915_drm.h"
 #include "drmtest.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
 #include "intel_gpu_tools.h"
+#include "rendercopy.h"
+
+struct bo {
+	const char *ring;
+	drm_intel_bo *src, *dst, *tmp;
+};
 
-static drm_intel_bufmgr *bufmgr;
-struct intel_batchbuffer *batch;
 static const int width = 512, height = 512;
 
-int main(int argc, char **argv)
+static void create_bo(drm_intel_bufmgr *bufmgr,
+		      struct bo *b,
+		      const char *ring)
 {
-	int fd;
-	int i;
-	drm_intel_bo *src_bo, *dst_bo;
+	int size = 4 * width * height, i;
 	uint32_t *map;
-	int fails = 0;
-	int pitch = width * 4;
-	int size = pitch * height;
-	int blits;
 
-	fd = drm_open_any();
-
-	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
-	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
-
-	src_bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096);
-	dst_bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096);
+	b->ring = ring;
+	b->src = drm_intel_bo_alloc(bufmgr, "src", size, 4096);
+	b->dst = drm_intel_bo_alloc(bufmgr, "dst", size, 4096);
+	b->tmp = drm_intel_bo_alloc(bufmgr, "tmp", size, 4096);
 
 	/* Fill the src with indexes of the pixels */
-	drm_intel_bo_map(src_bo, true);
-	map = src_bo->virtual;
+	drm_intel_bo_map(b->src, true);
+	map = b->src->virtual;
 	for (i = 0; i < width * height; i++)
 		map[i] = i;
-	drm_intel_bo_unmap(src_bo);
+	drm_intel_bo_unmap(b->src);
 
 	/* Fill the dst with garbage. */
-	drm_intel_bo_map(dst_bo, true);
-	map = dst_bo->virtual;
+	drm_intel_bo_map(b->dst, true);
+	map = b->dst->virtual;
 	for (i = 0; i < width * height; i++)
 		map[i] = 0xd0d0d0d0;
-	drm_intel_bo_unmap(dst_bo);
+	drm_intel_bo_unmap(b->dst);
+}
+
+static int check_bo(struct bo *b)
+{
+	const uint32_t *map;
+	int i, fails = 0;
+
+	drm_intel_bo_map(b->dst, false);
+	map = b->dst->virtual;
+	for (i = 0; i < width*height; i++) {
+		if (map[i] != i && ++fails <= 9) {
+			int x = i % width;
+			int y = i / width;
+
+			printf("%s: copy #%d at %d,%d failed: read 0x%08x\n",
+			       b->ring, i, x, y, map[i]);
+		}
+	}
+	drm_intel_bo_unmap(b->dst);
+
+	return fails;
+}
+
+static void destroy_bo(struct bo *b)
+{
+	drm_intel_bo_unreference(b->src);
+	drm_intel_bo_unreference(b->tmp);
+	drm_intel_bo_unreference(b->dst);
+}
+
+static int check_ring(drm_intel_bufmgr *bufmgr,
+		      struct intel_batchbuffer *batch,
+		      const char *ring,
+		      render_copyfunc_t copy)
+{
+	struct scratch_buf src, tmp, dst;
+	struct bo bo;
+	int i;
+
+	create_bo(bufmgr, &bo, ring);
+
+	src.stride = 4 * width;
+	src.tiling = 0;
+	src.data = src.cpu_mapping = NULL;
+	src.size = 4 * width * height;
+	src.num_tiles = 4 * width * height;
+	dst = tmp = src;
+
+	src.bo = bo.src;
+	tmp.bo = bo.tmp;
+	dst.bo = bo.dst;
 
 	/* The ring we've been using is 128k, and each rendering op
 	 * will use at least 8 dwords:
@@ -101,48 +149,75 @@ int main(int argc, char **argv)
 	 * So iterate just a little more than that -- if we don't fill the ring
 	 * doing this, we aren't likely to with this test.
 	 */
-	blits = width * height;
-	for (i = 0; i < blits; i++) {
+	for (i = 0; i < width * height; i++) {
 		int x = i % width;
 		int y = i / width;
 
 		assert(y < height);
 
-		BEGIN_BATCH(8);
-		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
-			  XY_SRC_COPY_BLT_WRITE_ALPHA |
-			  XY_SRC_COPY_BLT_WRITE_RGB);
-		OUT_BATCH((3 << 24) | /* 32 bits */
-			  (0xcc << 16) | /* copy ROP */
-			  pitch);
-		OUT_BATCH((y << 16) | x); /* dst x1,y1 */
-		OUT_BATCH(((y + 1) << 16) | (x + 1)); /* dst x2,y2 */
-		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
-		OUT_BATCH((y << 16) | x); /* src x1,y1 */
-		OUT_BATCH(pitch);
-		OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
-		ADVANCE_BATCH();
-
-		intel_batchbuffer_flush(batch);
+		/* Dummy load to fill the ring */
+		copy(batch, &src, 0, 0, width, height, &tmp, 0, 0);
+		/* And copy the src into dst, pixel by pixel */
+		copy(batch, &src, x, y, 1, 1, &dst, x, y);
 	}
 
 	/* verify */
-	drm_intel_bo_map(dst_bo, false);
-	map = dst_bo->virtual;
-	for (i = 0; i < blits; i++) {
-		int x = i % width;
-		int y = i / width;
+	i = check_bo(&bo);
+	destroy_bo(&bo);
 
-		if (map[i] != i) {
+	return i;
+}
 
-			printf("Copy #%d at %d,%d failed: read 0x%08x\n",
-			       i, x, y, map[i]);
+static void blt_copy(struct intel_batchbuffer *batch,
+		     struct scratch_buf *src, unsigned src_x, unsigned src_y,
+		     unsigned width, unsigned height,
+		     struct scratch_buf *dst, unsigned dst_x, unsigned dst_y)
+{
+	BEGIN_BATCH(8);
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst->stride);
+	OUT_BATCH((dst_y << 16) | dst_x); /* dst x1,y1 */
+	OUT_BATCH(((dst_y + width) << 16) | (dst_x + width)); /* dst x2,y2 */
+	OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH((src_y << 16) | src_x); /* src x1,y1 */
+	OUT_BATCH(src->stride);
+	OUT_RELOC(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
 
-			if (fails++ > 9)
-				exit(1);
-		}
-	}
-	drm_intel_bo_unmap(dst_bo);
+int main(int argc, char **argv)
+{
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	render_copyfunc_t copy;
+	int fd, fails = 0;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	fails += check_ring(bufmgr, batch, "blt", blt_copy);
+
+	/* Strictly only required on architectures with a separate BLT ring,
+	 * but lets stress everybody.
+	 */
+	copy = NULL;
+	if (IS_GEN2(batch->devid))
+		copy = gen2_render_copyfunc;
+	else if (IS_GEN3(batch->devid))
+		copy = gen3_render_copyfunc;
+	else if (IS_GEN6(batch->devid))
+		copy = gen6_render_copyfunc;
+	if (copy)
+		fails += check_ring(bufmgr, batch, "render", copy);
 
 	intel_batchbuffer_free(batch);
 	drm_intel_bufmgr_destroy(bufmgr);
author	Chris Wilson <chris@chris-wilson.co.uk>	2012-02-20 22:53:26 +0000
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2012-02-22 10:48:03 +0100
commit	fa6c2757feb18b17120310fe315ed32594ff326c (patch)
tree	57d8ef3c70a368369291c74288c2f7e396d983e6 /tests
parent	5cf555868214ac6d55d8102d3198813aadc37853 (diff)