3 files changed, 140 insertions, 55 deletions
diff --git a/lib/intel_gpu_tools.h b/lib/intel_gpu_tools.h
index f46abfa8..eb21a165 100644
--- a/lib/intel_gpu_tools.h
+++ b/lib/intel_gpu_tools.h
@@ -25,6 +25,9 @@
  *
  */
 
+#ifndef INTEL_GPU_TOOLS_H
+#define INTEL_GPU_TOOLS_H
+
 #include <stdint.h>
 #include <sys/types.h>
 #include <pciaccess.h>
@@ -94,3 +97,5 @@ extern enum pch_type pch;
 void intel_check_pch(void);
 
 #define HAS_CPT (pch == PCH_CPT)
+
+#endif /* INTEL_GPU_TOOLS_H */
diff --git a/lib/rendercopy.h b/lib/rendercopy.h
index 7547ac42..4fcc8170 100644
--- a/lib/rendercopy.h
+++ b/lib/rendercopy.h
@@ -58,6 +58,11 @@ static inline unsigned buf_height(struct scratch_buf *buf)
 	return buf->size/buf->stride;
 }
 
+typedef void (*render_copyfunc_t)(struct intel_batchbuffer *batch,
+				  struct scratch_buf *src, unsigned src_x, unsigned src_y,
+				  unsigned width, unsigned height,
+				  struct scratch_buf *dst, unsigned dst_x, unsigned dst_y);
+
 void gen6_render_copyfunc(struct intel_batchbuffer *batch,
 			  struct scratch_buf *src, unsigned src_x, unsigned src_y,
 			  unsigned width, unsigned height,
diff --git a/tests/gem_ringfill.c b/tests/gem_ringfill.c
index 685a010b..2d00f065 100644
--- a/tests/gem_ringfill.c
+++ b/tests/gem_ringfill.c
@@ -41,50 +41,98 @@
 #include <errno.h>
 #include <sys/stat.h>
 #include <sys/time.h>
+
 #include "drm.h"
 #include "i915_drm.h"
 #include "drmtest.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
 #include "intel_gpu_tools.h"
+#include "rendercopy.h"
+
+struct bo {
+	const char *ring;
+	drm_intel_bo *src, *dst, *tmp;
+};
 
-static drm_intel_bufmgr *bufmgr;
-struct intel_batchbuffer *batch;
 static const int width = 512, height = 512;
 
-int main(int argc, char **argv)
+static void create_bo(drm_intel_bufmgr *bufmgr,
+		      struct bo *b,
+		      const char *ring)
 {
-	int fd;
-	int i;
-	drm_intel_bo *src_bo, *dst_bo;
+	int size = 4 * width * height, i;
 	uint32_t *map;
-	int fails = 0;
-	int pitch = width * 4;
-	int size = pitch * height;
-	int blits;
 
-	fd = drm_open_any();
-
-	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
-	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
-
-	src_bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096);
-	dst_bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096);
+	b->ring = ring;
+	b->src = drm_intel_bo_alloc(bufmgr, "src", size, 4096);
+	b->dst = drm_intel_bo_alloc(bufmgr, "dst", size, 4096);
+	b->tmp = drm_intel_bo_alloc(bufmgr, "tmp", size, 4096);
 
 	/* Fill the src with indexes of the pixels */
-	drm_intel_bo_map(src_bo, true);
-	map = src_bo->virtual;
+	drm_intel_bo_map(b->src, true);
+	map = b->src->virtual;
 	for (i = 0; i < width * height; i++)
 		map[i] = i;
-	drm_intel_bo_unmap(src_bo);
+	drm_intel_bo_unmap(b->src);
 
 	/* Fill the dst with garbage. */
-	drm_intel_bo_map(dst_bo, true);
-	map = dst_bo->virtual;
+	drm_intel_bo_map(b->dst, true);
+	map = b->dst->virtual;
 	for (i = 0; i < width * height; i++)
 		map[i] = 0xd0d0d0d0;
-	drm_intel_bo_unmap(dst_bo);
+	drm_intel_bo_unmap(b->dst);
+}
+
+static int check_bo(struct bo *b)
+{
+	const uint32_t *map;
+	int i, fails = 0;
+
+	drm_intel_bo_map(b->dst, false);
+	map = b->dst->virtual;
+	for (i = 0; i < width*height; i++) {
+		if (map[i] != i && ++fails <= 9) {
+			int x = i % width;
+			int y = i / width;
+
+			printf("%s: copy #%d at %d,%d failed: read 0x%08x\n",
+			       b->ring, i, x, y, map[i]);
+		}
+	}
+	drm_intel_bo_unmap(b->dst);
+
+	return fails;
+}
+
+static void destroy_bo(struct bo *b)
+{
+	drm_intel_bo_unreference(b->src);
+	drm_intel_bo_unreference(b->tmp);
+	drm_intel_bo_unreference(b->dst);
+}
+
+static int check_ring(drm_intel_bufmgr *bufmgr,
+		      struct intel_batchbuffer *batch,
+		      const char *ring,
+		      render_copyfunc_t copy)
+{
+	struct scratch_buf src, tmp, dst;
+	struct bo bo;
+	int i;
+
+	create_bo(bufmgr, &bo, ring);
+
+	src.stride = 4 * width;
+	src.tiling = 0;
+	src.data = src.cpu_mapping = NULL;
+	src.size = 4 * width * height;
+	src.num_tiles = 4 * width * height;
+	dst = tmp = src;
+
+	src.bo = bo.src;
+	tmp.bo = bo.tmp;
+	dst.bo = bo.dst;
 
 	/* The ring we've been using is 128k, and each rendering op
 	 * will use at least 8 dwords:
@@ -101,48 +149,75 @@ int main(int argc, char **argv)
 	 * So iterate just a little more than that -- if we don't fill the ring
 	 * doing this, we aren't likely to with this test.
 	 */
-	blits = width * height;
-	for (i = 0; i < blits; i++) {
+	for (i = 0; i < width * height; i++) {
 		int x = i % width;
 		int y = i / width;
 
 		assert(y < height);
 
-		BEGIN_BATCH(8);
-		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
-			  XY_SRC_COPY_BLT_WRITE_ALPHA |
-			  XY_SRC_COPY_BLT_WRITE_RGB);
-		OUT_BATCH((3 << 24) | /* 32 bits */
-			  (0xcc << 16) | /* copy ROP */
-			  pitch);
-		OUT_BATCH((y << 16) | x); /* dst x1,y1 */
-		OUT_BATCH(((y + 1) << 16) | (x + 1)); /* dst x2,y2 */
-		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
-		OUT_BATCH((y << 16) | x); /* src x1,y1 */
-		OUT_BATCH(pitch);
-		OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
-		ADVANCE_BATCH();
-
-		intel_batchbuffer_flush(batch);
+		/* Dummy load to fill the ring */
+		copy(batch, &src, 0, 0, width, height, &tmp, 0, 0);
+		/* And copy the src into dst, pixel by pixel */
+		copy(batch, &src, x, y, 1, 1, &dst, x, y);
 	}
 
 	/* verify */
-	drm_intel_bo_map(dst_bo, false);
-	map = dst_bo->virtual;
-	for (i = 0; i < blits; i++) {
-		int x = i % width;
-		int y = i / width;
+	i = check_bo(&bo);
+	destroy_bo(&bo);
 
-		if (map[i] != i) {
+	return i;
+}
 
-			printf("Copy #%d at %d,%d failed: read 0x%08x\n",
-			       i, x, y, map[i]);
+static void blt_copy(struct intel_batchbuffer *batch,
+		     struct scratch_buf *src, unsigned src_x, unsigned src_y,
+		     unsigned width, unsigned height,
+		     struct scratch_buf *dst, unsigned dst_x, unsigned dst_y)
+{
+	BEGIN_BATCH(8);
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst->stride);
+	OUT_BATCH((dst_y << 16) | dst_x); /* dst x1,y1 */
+	OUT_BATCH(((dst_y + width) << 16) | (dst_x + width)); /* dst x2,y2 */
+	OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH((src_y << 16) | src_x); /* src x1,y1 */
+	OUT_BATCH(src->stride);
+	OUT_RELOC(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
 
-			if (fails++ > 9)
-				exit(1);
-		}
-	}
-	drm_intel_bo_unmap(dst_bo);
+int main(int argc, char **argv)
+{
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	render_copyfunc_t copy;
+	int fd, fails = 0;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	fails += check_ring(bufmgr, batch, "blt", blt_copy);
+
+	/* Strictly only required on architectures with a separate BLT ring,
+	 * but lets stress everybody.
+	 */
+	copy = NULL;
+	if (IS_GEN2(batch->devid))
+		copy = gen2_render_copyfunc;
+	else if (IS_GEN3(batch->devid))
+		copy = gen3_render_copyfunc;
+	else if (IS_GEN6(batch->devid))
+		copy = gen6_render_copyfunc;
+	if (copy)
+		fails += check_ring(bufmgr, batch, "render", copy);
 
 	intel_batchbuffer_free(batch);
 	drm_intel_bufmgr_destroy(bufmgr);