diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-02-20 22:53:26 +0000 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2012-02-22 10:48:03 +0100 |
commit | fa6c2757feb18b17120310fe315ed32594ff326c (patch) | |
tree | 57d8ef3c70a368369291c74288c2f7e396d983e6 /tests | |
parent | 5cf555868214ac6d55d8102d3198813aadc37853 (diff) |
tests/gem_ringfill: Exercise all rings
On SandyBridge, the BLT commands were split from the RENDER commands as
well as the BSD split inherited from Ironlake. So we need to make sure
we do exercise each ring, and in order to do so we also need to make
sure each batch takes longer to execute than it takes for us to
submit it.
v2: Exercise each ring sequentially.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/gem_ringfill.c | 185 |
1 files changed, 130 insertions, 55 deletions
diff --git a/tests/gem_ringfill.c b/tests/gem_ringfill.c index 685a010b..2d00f065 100644 --- a/tests/gem_ringfill.c +++ b/tests/gem_ringfill.c @@ -41,50 +41,98 @@ #include <errno.h> #include <sys/stat.h> #include <sys/time.h> + #include "drm.h" #include "i915_drm.h" #include "drmtest.h" #include "intel_bufmgr.h" #include "intel_batchbuffer.h" #include "intel_gpu_tools.h" +#include "rendercopy.h" + +struct bo { + const char *ring; + drm_intel_bo *src, *dst, *tmp; +}; -static drm_intel_bufmgr *bufmgr; -struct intel_batchbuffer *batch; static const int width = 512, height = 512; -int main(int argc, char **argv) +static void create_bo(drm_intel_bufmgr *bufmgr, + struct bo *b, + const char *ring) { - int fd; - int i; - drm_intel_bo *src_bo, *dst_bo; + int size = 4 * width * height, i; uint32_t *map; - int fails = 0; - int pitch = width * 4; - int size = pitch * height; - int blits; - fd = drm_open_any(); - - bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); - drm_intel_bufmgr_gem_enable_reuse(bufmgr); - batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd)); - - src_bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096); - dst_bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096); + b->ring = ring; + b->src = drm_intel_bo_alloc(bufmgr, "src", size, 4096); + b->dst = drm_intel_bo_alloc(bufmgr, "dst", size, 4096); + b->tmp = drm_intel_bo_alloc(bufmgr, "tmp", size, 4096); /* Fill the src with indexes of the pixels */ - drm_intel_bo_map(src_bo, true); - map = src_bo->virtual; + drm_intel_bo_map(b->src, true); + map = b->src->virtual; for (i = 0; i < width * height; i++) map[i] = i; - drm_intel_bo_unmap(src_bo); + drm_intel_bo_unmap(b->src); /* Fill the dst with garbage. */ - drm_intel_bo_map(dst_bo, true); - map = dst_bo->virtual; + drm_intel_bo_map(b->dst, true); + map = b->dst->virtual; for (i = 0; i < width * height; i++) map[i] = 0xd0d0d0d0; - drm_intel_bo_unmap(dst_bo); + drm_intel_bo_unmap(b->dst); +} + +static int check_bo(struct bo *b) +{ + const uint32_t *map; + int i, fails = 0; + + drm_intel_bo_map(b->dst, false); + map = b->dst->virtual; + for (i = 0; i < width*height; i++) { + if (map[i] != i && ++fails <= 9) { + int x = i % width; + int y = i / width; + + printf("%s: copy #%d at %d,%d failed: read 0x%08x\n", + b->ring, i, x, y, map[i]); + } + } + drm_intel_bo_unmap(b->dst); + + return fails; +} + +static void destroy_bo(struct bo *b) +{ + drm_intel_bo_unreference(b->src); + drm_intel_bo_unreference(b->tmp); + drm_intel_bo_unreference(b->dst); +} + +static int check_ring(drm_intel_bufmgr *bufmgr, + struct intel_batchbuffer *batch, + const char *ring, + render_copyfunc_t copy) +{ + struct scratch_buf src, tmp, dst; + struct bo bo; + int i; + + create_bo(bufmgr, &bo, ring); + + src.stride = 4 * width; + src.tiling = 0; + src.data = src.cpu_mapping = NULL; + src.size = 4 * width * height; + src.num_tiles = 4 * width * height; + dst = tmp = src; + + src.bo = bo.src; + tmp.bo = bo.tmp; + dst.bo = bo.dst; /* The ring we've been using is 128k, and each rendering op * will use at least 8 dwords: @@ -101,48 +149,75 @@ int main(int argc, char **argv) * So iterate just a little more than that -- if we don't fill the ring * doing this, we aren't likely to with this test. */ - blits = width * height; - for (i = 0; i < blits; i++) { + for (i = 0; i < width * height; i++) { int x = i % width; int y = i / width; assert(y < height); - BEGIN_BATCH(8); - OUT_BATCH(XY_SRC_COPY_BLT_CMD | - XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - OUT_BATCH((3 << 24) | /* 32 bits */ - (0xcc << 16) | /* copy ROP */ - pitch); - OUT_BATCH((y << 16) | x); /* dst x1,y1 */ - OUT_BATCH(((y + 1) << 16) | (x + 1)); /* dst x2,y2 */ - OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); - OUT_BATCH((y << 16) | x); /* src x1,y1 */ - OUT_BATCH(pitch); - OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0); - ADVANCE_BATCH(); - - intel_batchbuffer_flush(batch); + /* Dummy load to fill the ring */ + copy(batch, &src, 0, 0, width, height, &tmp, 0, 0); + /* And copy the src into dst, pixel by pixel */ + copy(batch, &src, x, y, 1, 1, &dst, x, y); } /* verify */ - drm_intel_bo_map(dst_bo, false); - map = dst_bo->virtual; - for (i = 0; i < blits; i++) { - int x = i % width; - int y = i / width; + i = check_bo(&bo); + destroy_bo(&bo); - if (map[i] != i) { + return i; +} - printf("Copy #%d at %d,%d failed: read 0x%08x\n", - i, x, y, map[i]); +static void blt_copy(struct intel_batchbuffer *batch, + struct scratch_buf *src, unsigned src_x, unsigned src_y, + unsigned width, unsigned height, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y) +{ + BEGIN_BATCH(8); + OUT_BATCH(XY_SRC_COPY_BLT_CMD | + XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + OUT_BATCH((3 << 24) | /* 32 bits */ + (0xcc << 16) | /* copy ROP */ + dst->stride); + OUT_BATCH((dst_y << 16) | dst_x); /* dst x1,y1 */ + OUT_BATCH(((dst_y + width) << 16) | (dst_x + width)); /* dst x2,y2 */ + OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH((src_y << 16) | src_x); /* src x1,y1 */ + OUT_BATCH(src->stride); + OUT_RELOC(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_flush(batch); +} - if (fails++ > 9) - exit(1); - } - } - drm_intel_bo_unmap(dst_bo); +int main(int argc, char **argv) +{ + drm_intel_bufmgr *bufmgr; + struct intel_batchbuffer *batch; + render_copyfunc_t copy; + int fd, fails = 0; + + fd = drm_open_any(); + + bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); + drm_intel_bufmgr_gem_enable_reuse(bufmgr); + batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd)); + + fails += check_ring(bufmgr, batch, "blt", blt_copy); + + /* Strictly only required on architectures with a separate BLT ring, + * but lets stress everybody. + */ + copy = NULL; + if (IS_GEN2(batch->devid)) + copy = gen2_render_copyfunc; + else if (IS_GEN3(batch->devid)) + copy = gen3_render_copyfunc; + else if (IS_GEN6(batch->devid)) + copy = gen6_render_copyfunc; + if (copy) + fails += check_ring(bufmgr, batch, "render", copy); intel_batchbuffer_free(batch); drm_intel_bufmgr_destroy(bufmgr); |