summaryrefslogtreecommitdiff
path: root/tests/i915/gem_stress.c
diff options
context:
space:
mode:
Diffstat (limited to 'tests/i915/gem_stress.c')
-rw-r--r--tests/i915/gem_stress.c914
1 files changed, 914 insertions, 0 deletions
diff --git a/tests/i915/gem_stress.c b/tests/i915/gem_stress.c
new file mode 100644
index 00000000..225f283e
--- /dev/null
+++ b/tests/i915/gem_stress.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright © 2011 Daniel Vetter
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ * Partially based upon gem_tiled_fence_blits.c
+ */
+
+/** @file gem_stress.c
+ *
+ * This is a general gem coherency test. It's designed to eventually replicate
+ * any possible sequence of access patterns. It works by copying a set of tiles
+ * between two sets of backing buffer objects, randomly permutating the assinged
+ * position on each copy operations.
+ *
+ * The copy operation are done in tiny portions (to reduce any race windows
+ * for corruptions, hence increasing the chances for observing one) and are
+ * constantly switched between all means to copy stuff (fenced blitter, unfenced
+ * render, mmap, pwrite/read).
+ *
+ * After every complete move of a set tiling parameters of a buffer are randomly
+ * changed to simulate the effects of libdrm caching.
+ *
+ * Buffers are 1mb big to nicely fit into fences on gen2/3. A few are further
+ * split up to test relaxed fencing. Using this to push the average working set
+ * size over the available gtt space forces objects to be mapped as unfenceable
+ * (and as a side-effect tests gtt map/unmap coherency).
+ *
+ * In short: designed for maximum evilness.
+ */
+
+#include "igt.h"
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("General gem coherency test.");
+
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+
+#define DUCTAPE 0xdead0001
+#define TILESZ 0xdead0002
+#define CHCK_RENDER 0xdead0003
+
+/** TODO:
+ * - beat on relaxed fencing (i.e. mappable/fenceable tracking in the kernel)
+ * - render copy (to check fence tracking and cache coherency management by the
+ * kernel)
+ * - multi-threading: probably just a wrapper script to launch multiple
+ * instances + an option to accordingly reduce the working set
+ * - gen6 inter-ring coherency (needs render copy, first)
+ * - variable buffer size
+ * - add an option to fork a second process that randomly sends signals to the
+ * first one (to check consistency of the kernel recovery paths)
+ */
+
+drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+int drm_fd;
+int devid;
+int num_fences;
+
+drm_intel_bo *busy_bo;
+
+struct option_struct {
+ unsigned scratch_buf_size;
+ unsigned max_dimension;
+ unsigned num_buffers;
+ int trace_tile;
+ int no_hw;
+ int gpu_busy_load;
+ int use_render;
+ int use_blt;
+ int forced_tiling;
+ int use_cpu_maps;
+ int total_rounds;
+ int fail;
+ int tiles_per_buf;
+ int ducttape;
+ int tile_size;
+ int check_render_cpyfn;
+ int use_signal_helper;
+};
+
+struct option_struct options;
+
+#define MAX_BUFS 4096
+#define SCRATCH_BUF_SIZE 1024*1024
+#define BUSY_BUF_SIZE (256*4096)
+#define TILE_BYTES(size) ((size)*(size)*sizeof(uint32_t))
+
+static struct igt_buf buffers[2][MAX_BUFS];
+/* tile i is at logical position tile_permutation[i] */
+static unsigned *tile_permutation;
+static unsigned num_buffers = 0;
+static unsigned current_set = 0;
+static unsigned target_set = 0;
+static unsigned num_total_tiles = 0;
+
+int fence_storm = 0;
+static int gpu_busy_load = 10;
+
+struct {
+ unsigned num_failed;
+ unsigned max_failed_reads;
+} stats;
+
+static void tile2xy(struct igt_buf *buf, unsigned tile, unsigned *x, unsigned *y)
+{
+ igt_assert(tile < buf->num_tiles);
+ *x = (tile*options.tile_size) % (buf->stride/sizeof(uint32_t));
+ *y = ((tile*options.tile_size) / (buf->stride/sizeof(uint32_t))) * options.tile_size;
+}
+
+static void emit_blt(drm_intel_bo *src_bo, uint32_t src_tiling, unsigned src_pitch,
+ unsigned src_x, unsigned src_y, unsigned w, unsigned h,
+ drm_intel_bo *dst_bo, uint32_t dst_tiling, unsigned dst_pitch,
+ unsigned dst_x, unsigned dst_y)
+{
+ uint32_t cmd_bits = 0;
+
+ if (IS_965(devid) && src_tiling) {
+ src_pitch /= 4;
+ cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
+ }
+
+ if (IS_965(devid) && dst_tiling) {
+ dst_pitch /= 4;
+ cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
+ }
+
+ /* copy lower half to upper half */
+ BLIT_COPY_BATCH_START(cmd_bits);
+ OUT_BATCH((3 << 24) | /* 32 bits */
+ (0xcc << 16) | /* copy ROP */
+ dst_pitch);
+ OUT_BATCH(dst_y << 16 | dst_x);
+ OUT_BATCH((dst_y+h) << 16 | (dst_x+w));
+ OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+ OUT_BATCH(src_y << 16 | src_x);
+ OUT_BATCH(src_pitch);
+ OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+ ADVANCE_BATCH();
+
+ if (batch->gen >= 6) {
+ BEGIN_BATCH(3, 0);
+ OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+}
+
+/* All this gem trashing wastes too much cpu time, so give the gpu something to
+ * do to increase changes for races. */
+static void keep_gpu_busy(void)
+{
+ int tmp;
+
+ tmp = 1 << gpu_busy_load;
+ igt_assert_lte(tmp, 1024);
+
+ emit_blt(busy_bo, 0, 4096, 0, 0, tmp, 128,
+ busy_bo, 0, 4096, 0, 128);
+}
+
+static void set_to_cpu_domain(struct igt_buf *buf, int writing)
+{
+ gem_set_domain(drm_fd, buf->bo->handle, I915_GEM_DOMAIN_CPU,
+ writing ? I915_GEM_DOMAIN_CPU : 0);
+}
+
+static unsigned int copyfunc_seq = 0;
+static void (*copyfunc)(struct igt_buf *src, unsigned src_x, unsigned src_y,
+ struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+ unsigned logical_tile_no);
+
+/* stride, x, y in units of uint32_t! */
+static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigned src_y,
+ uint32_t *dst, unsigned dst_stride, unsigned dst_x, unsigned dst_y,
+ unsigned logical_tile_no)
+{
+ int i, j;
+ int failed = 0;
+
+ for (i = 0; i < options.tile_size; i++) {
+ for (j = 0; j < options.tile_size; j++) {
+ unsigned dst_ofs = dst_x + j + dst_stride * (dst_y + i);
+ unsigned src_ofs = src_x + j + src_stride * (src_y + i);
+ unsigned expect = logical_tile_no*options.tile_size*options.tile_size
+ + i*options.tile_size + j;
+ uint32_t tmp = src[src_ofs];
+ if (tmp != expect) {
+ igt_info("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n", logical_tile_no, i * options.tile_size + j, tmp, expect, (int)tmp - expect);
+ igt_fail_on(options.trace_tile >= 0 && options.fail);
+ failed++;
+ }
+ /* when not aborting, correct any errors */
+ dst[dst_ofs] = expect;
+ }
+ }
+ igt_fail_on(failed && options.fail);
+
+ if (failed > stats.max_failed_reads)
+ stats.max_failed_reads = failed;
+ if (failed)
+ stats.num_failed++;
+}
+
+static void cpu_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
+ struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+ unsigned logical_tile_no)
+{
+ igt_assert(batch->ptr == batch->buffer);
+
+ if (options.ducttape)
+ drm_intel_bo_wait_rendering(dst->bo);
+
+ if (options.use_cpu_maps) {
+ set_to_cpu_domain(src, 0);
+ set_to_cpu_domain(dst, 1);
+ }
+
+ cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
+ dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
+ logical_tile_no);
+}
+
+static void prw_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
+ struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+ unsigned logical_tile_no)
+{
+ uint32_t tmp_tile[options.tile_size*options.tile_size];
+ int i;
+
+ igt_assert(batch->ptr == batch->buffer);
+
+ if (options.ducttape)
+ drm_intel_bo_wait_rendering(dst->bo);
+
+ if (src->tiling == I915_TILING_NONE) {
+ for (i = 0; i < options.tile_size; i++) {
+ unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i);
+ drm_intel_bo_get_subdata(src->bo, ofs,
+ options.tile_size*sizeof(uint32_t),
+ tmp_tile + options.tile_size*i);
+ }
+ } else {
+ if (options.use_cpu_maps)
+ set_to_cpu_domain(src, 0);
+
+ cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
+ tmp_tile, options.tile_size, 0, 0, logical_tile_no);
+ }
+
+ if (dst->tiling == I915_TILING_NONE) {
+ for (i = 0; i < options.tile_size; i++) {
+ unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i);
+ drm_intel_bo_subdata(dst->bo, ofs,
+ options.tile_size*sizeof(uint32_t),
+ tmp_tile + options.tile_size*i);
+ }
+ } else {
+ if (options.use_cpu_maps)
+ set_to_cpu_domain(dst, 1);
+
+ cpucpy2d(tmp_tile, options.tile_size, 0, 0,
+ dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
+ logical_tile_no);
+ }
+}
+
+static void blitter_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
+ struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+ unsigned logical_tile_no)
+{
+ static unsigned keep_gpu_busy_counter = 0;
+
+ /* check both edges of the fence usage */
+ if (keep_gpu_busy_counter & 1 && !fence_storm)
+ keep_gpu_busy();
+
+ emit_blt(src->bo, src->tiling, src->stride, src_x, src_y,
+ options.tile_size, options.tile_size,
+ dst->bo, dst->tiling, dst->stride, dst_x, dst_y);
+
+ if (!(keep_gpu_busy_counter & 1) && !fence_storm)
+ keep_gpu_busy();
+
+ keep_gpu_busy_counter++;
+
+ if (src->tiling)
+ fence_storm--;
+ if (dst->tiling)
+ fence_storm--;
+
+ if (fence_storm <= 1) {
+ fence_storm = 0;
+ intel_batchbuffer_flush(batch);
+ }
+}
+
+static void render_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
+ struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
+ unsigned logical_tile_no)
+{
+ static unsigned keep_gpu_busy_counter = 0;
+ igt_render_copyfunc_t rendercopy = igt_get_render_copyfunc(devid);
+
+ /* check both edges of the fence usage */
+ if (keep_gpu_busy_counter & 1)
+ keep_gpu_busy();
+
+ if (rendercopy) {
+ /*
+ * Flush outstanding blts so that they don't end up on
+ * the render ring when that's not allowed (gen6+).
+ */
+ intel_batchbuffer_flush(batch);
+ rendercopy(batch, NULL, src, src_x, src_y,
+ options.tile_size, options.tile_size,
+ dst, dst_x, dst_y);
+ } else
+ blitter_copyfunc(src, src_x, src_y,
+ dst, dst_x, dst_y,
+ logical_tile_no);
+ if (!(keep_gpu_busy_counter & 1))
+ keep_gpu_busy();
+
+ keep_gpu_busy_counter++;
+ intel_batchbuffer_flush(batch);
+}
+
+static void next_copyfunc(int tile)
+{
+ if (fence_storm) {
+ if (tile == options.trace_tile)
+ igt_info(" using fence storm\n");
+ return;
+ }
+
+ if (copyfunc_seq % 61 == 0
+ && options.forced_tiling != I915_TILING_NONE) {
+ if (tile == options.trace_tile)
+ igt_info(" using fence storm\n");
+ fence_storm = num_fences;
+ copyfunc = blitter_copyfunc;
+ } else if (copyfunc_seq % 17 == 0) {
+ if (tile == options.trace_tile)
+ igt_info(" using cpu\n");
+ copyfunc = cpu_copyfunc;
+ } else if (copyfunc_seq % 19 == 0) {
+ if (tile == options.trace_tile)
+ igt_info(" using prw\n");
+ copyfunc = prw_copyfunc;
+ } else if (copyfunc_seq % 3 == 0 && options.use_render) {
+ if (tile == options.trace_tile)
+ igt_info(" using render\n");
+ copyfunc = render_copyfunc;
+ } else if (options.use_blt){
+ if (tile == options.trace_tile)
+ igt_info(" using blitter\n");
+ copyfunc = blitter_copyfunc;
+ } else if (options.use_render){
+ if (tile == options.trace_tile)
+ igt_info(" using render\n");
+ copyfunc = render_copyfunc;
+ } else {
+ copyfunc = cpu_copyfunc;
+ }
+
+ copyfunc_seq++;
+}
+
+static void fan_out(void)
+{
+ uint32_t tmp_tile[options.tile_size*options.tile_size];
+ uint32_t seq = 0;
+ int i, k;
+ unsigned tile, buf_idx, x, y;
+
+ for (i = 0; i < num_total_tiles; i++) {
+ tile = i;
+ buf_idx = tile / options.tiles_per_buf;
+ tile %= options.tiles_per_buf;
+
+ tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
+
+ for (k = 0; k < options.tile_size*options.tile_size; k++)
+ tmp_tile[k] = seq++;
+
+ if (options.use_cpu_maps)
+ set_to_cpu_domain(&buffers[current_set][buf_idx], 1);
+
+ cpucpy2d(tmp_tile, options.tile_size, 0, 0,
+ buffers[current_set][buf_idx].data,
+ buffers[current_set][buf_idx].stride / sizeof(uint32_t),
+ x, y, i);
+ }
+
+ for (i = 0; i < num_total_tiles; i++)
+ tile_permutation[i] = i;
+}
+
+static void fan_in_and_check(void)
+{
+ uint32_t tmp_tile[options.tile_size*options.tile_size];
+ unsigned tile, buf_idx, x, y;
+ int i;
+ for (i = 0; i < num_total_tiles; i++) {
+ tile = tile_permutation[i];
+ buf_idx = tile / options.tiles_per_buf;
+ tile %= options.tiles_per_buf;
+
+ tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
+
+ if (options.use_cpu_maps)
+ set_to_cpu_domain(&buffers[current_set][buf_idx], 0);
+
+ cpucpy2d(buffers[current_set][buf_idx].data,
+ buffers[current_set][buf_idx].stride / sizeof(uint32_t),
+ x, y,
+ tmp_tile, options.tile_size, 0, 0,
+ i);
+ }
+}
+
+static void sanitize_stride(struct igt_buf *buf)
+{
+
+ if (igt_buf_height(buf) > options.max_dimension)
+ buf->stride = buf->size / options.max_dimension;
+
+ if (igt_buf_height(buf) < options.tile_size)
+ buf->stride = buf->size / options.tile_size;
+
+ if (igt_buf_width(buf) < options.tile_size)
+ buf->stride = options.tile_size * sizeof(uint32_t);
+
+ igt_assert(buf->stride <= 8192);
+ igt_assert(igt_buf_width(buf) <= options.max_dimension);
+ igt_assert(igt_buf_height(buf) <= options.max_dimension);
+
+ igt_assert(igt_buf_width(buf) >= options.tile_size);
+ igt_assert(igt_buf_height(buf) >= options.tile_size);
+
+}
+
+static void init_buffer(struct igt_buf *buf, unsigned size)
+{
+ memset(buf, 0, sizeof(*buf));
+
+ buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096);
+ buf->size = size;
+ igt_assert(buf->bo);
+ buf->tiling = I915_TILING_NONE;
+ buf->stride = 4096;
+
+ sanitize_stride(buf);
+
+ if (options.no_hw)
+ buf->data = malloc(size);
+ else {
+ if (options.use_cpu_maps)
+ drm_intel_bo_map(buf->bo, 1);
+ else
+ drm_intel_gem_bo_map_gtt(buf->bo);
+ buf->data = buf->bo->virtual;
+ }
+
+ buf->num_tiles = options.tiles_per_buf;
+}
+
+static void exchange_buf(void *array, unsigned i, unsigned j)
+{
+ struct igt_buf *buf_arr, tmp;
+ buf_arr = array;
+
+ memcpy(&tmp, &buf_arr[i], sizeof(struct igt_buf));
+ memcpy(&buf_arr[i], &buf_arr[j], sizeof(struct igt_buf));
+ memcpy(&buf_arr[j], &tmp, sizeof(struct igt_buf));
+}
+
+
+static void init_set(unsigned set)
+{
+ long int r;
+ int i;
+
+ igt_permute_array(buffers[set], num_buffers, exchange_buf);
+
+ if (current_set == 1 && options.gpu_busy_load == 0) {
+ gpu_busy_load++;
+ if (gpu_busy_load > 10)
+ gpu_busy_load = 6;
+ }
+
+ for (i = 0; i < num_buffers; i++) {
+ r = random();
+ if ((r & 3) != 0)
+ continue;
+ r >>= 2;
+
+ if ((r & 3) != 0)
+ buffers[set][i].tiling = I915_TILING_X;
+ else
+ buffers[set][i].tiling = I915_TILING_NONE;
+ r >>= 2;
+ if (options.forced_tiling >= 0)
+ buffers[set][i].tiling = options.forced_tiling;
+
+ if (buffers[set][i].tiling == I915_TILING_NONE) {
+ /* min 64 byte stride */
+ r %= 8;
+ buffers[set][i].stride = 64 * (1 << r);
+ } else if (IS_GEN2(devid)) {
+ /* min 128 byte stride */
+ r %= 7;
+ buffers[set][i].stride = 128 * (1 << r);
+ } else {
+ /* min 512 byte stride */
+ r %= 5;
+ buffers[set][i].stride = 512 * (1 << r);
+ }
+
+ sanitize_stride(&buffers[set][i]);
+
+ gem_set_tiling(drm_fd, buffers[set][i].bo->handle,
+ buffers[set][i].tiling,
+ buffers[set][i].stride);
+
+ if (options.trace_tile != -1 && i == options.trace_tile/options.tiles_per_buf)
+ igt_info("changing buffer %i containing tile %i: tiling %i, stride %i\n", i, options.trace_tile, buffers[set][i].tiling, buffers[set][i].stride);
+ }
+}
+
+static void exchange_uint(void *array, unsigned i, unsigned j)
+{
+ unsigned *i_arr = array;
+
+ igt_swap(i_arr[i], i_arr[j]);
+}
+
+static void copy_tiles(unsigned *permutation)
+{
+ unsigned src_tile, src_buf_idx, src_x, src_y;
+ unsigned dst_tile, dst_buf_idx, dst_x, dst_y;
+ struct igt_buf *src_buf, *dst_buf;
+ int i, idx;
+ for (i = 0; i < num_total_tiles; i++) {
+ /* tile_permutation is independent of current_permutation, so
+ * abuse it to randomize the order of the src bos */
+ idx = tile_permutation[i];
+ src_buf_idx = idx / options.tiles_per_buf;
+ src_tile = idx % options.tiles_per_buf;
+ src_buf = &buffers[current_set][src_buf_idx];
+
+ tile2xy(src_buf, src_tile, &src_x, &src_y);
+
+ dst_buf_idx = permutation[idx] / options.tiles_per_buf;
+ dst_tile = permutation[idx] % options.tiles_per_buf;
+ dst_buf = &buffers[target_set][dst_buf_idx];
+
+ tile2xy(dst_buf, dst_tile, &dst_x, &dst_y);
+
+ if (options.trace_tile == i)
+ igt_info("copying tile %i from %i (%i, %i) to %i (%i, %i)", i, tile_permutation[i], src_buf_idx, src_tile, permutation[idx], dst_buf_idx, dst_tile);
+
+ if (options.no_hw) {
+ cpucpy2d(src_buf->data,
+ src_buf->stride / sizeof(uint32_t),
+ src_x, src_y,
+ dst_buf->data,
+ dst_buf->stride / sizeof(uint32_t),
+ dst_x, dst_y,
+ i);
+ } else {
+ next_copyfunc(i);
+
+ copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y,
+ i);
+ }
+ }
+
+ intel_batchbuffer_flush(batch);
+}
+
+static void sanitize_tiles_per_buf(void)
+{
+ if (options.tiles_per_buf > options.scratch_buf_size / TILE_BYTES(options.tile_size))
+ options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
+}
+
+static int parse_options(int opt, int opt_index, void *data)
+{
+ int tmp;
+
+ switch(opt) {
+ case 'd':
+ options.no_hw = 1;
+ igt_info("no-hw debug mode\n");
+ break;
+ case 'S':
+ options.use_signal_helper = 0;
+ igt_info("disabling that pesky nuisance who keeps interrupting us\n");
+ break;
+ case 's':
+ tmp = atoi(optarg);
+ if (tmp < options.tile_size*8192)
+ igt_info("scratch buffer size needs to be at least %i\n", options.tile_size * 8192);
+ else if (tmp & (tmp - 1)) {
+ igt_info("scratch buffer size needs to be a power-of-two\n");
+ } else {
+ igt_info("fixed scratch buffer size to %u\n", tmp);
+ options.scratch_buf_size = tmp;
+ sanitize_tiles_per_buf();
+ }
+ break;
+ case 'g':
+ tmp = atoi(optarg);
+ if (tmp < 0 || tmp > 10)
+ igt_info("gpu busy load needs to be bigger than 0 and smaller than 10\n");
+ else {
+ igt_info("gpu busy load factor set to %i\n", tmp);
+ gpu_busy_load = options.gpu_busy_load = tmp;
+ }
+ break;
+ case 'c':
+ options.num_buffers = atoi(optarg);
+ igt_info("buffer count set to %i\n", options.num_buffers);
+ break;
+ case 't':
+ options.trace_tile = atoi(optarg);
+ igt_info("tracing tile %i\n", options.trace_tile);
+ break;
+ case 'r':
+ options.use_render = 0;
+ igt_info("disabling render copy\n");
+ break;
+ case 'b':
+ options.use_blt = 0;
+ igt_info("disabling blt copy\n");
+ break;
+ case 'u':
+ options.forced_tiling = I915_TILING_NONE;
+ igt_info("disabling tiling\n");
+ break;
+ case 'x':
+ if (options.use_cpu_maps) {
+ igt_info("tiling not possible with cpu maps\n");
+ } else {
+ options.forced_tiling = I915_TILING_X;
+ igt_info("using only X-tiling\n");
+ }
+ break;
+ case 'm':
+ options.use_cpu_maps = 1;
+ options.forced_tiling = I915_TILING_NONE;
+ igt_info("disabling tiling\n");
+ break;
+ case 'o':
+ options.total_rounds = atoi(optarg);
+ igt_info("total rounds %i\n", options.total_rounds);
+ break;
+ case 'f':
+ options.fail = 0;
+ igt_info("not failing when detecting errors\n");
+ break;
+ case 'p':
+ options.tiles_per_buf = atoi(optarg);
+ igt_info("tiles per buffer %i\n", options.tiles_per_buf);
+ break;
+ case DUCTAPE:
+ options.ducttape = 0;
+ igt_info("applying duct-tape\n");
+ break;
+ case TILESZ:
+ options.tile_size = atoi(optarg);
+ sanitize_tiles_per_buf();
+ igt_info("til size %i\n", options.tile_size);
+ break;
+ case CHCK_RENDER:
+ options.check_render_cpyfn = 1;
+ igt_info("checking render copy function\n");
+ break;
+ }
+
+ /* actually 32767, according to docs, but that kills our nice pot calculations. */
+ options.max_dimension = 16*1024;
+ if (options.use_render) {
+ if (IS_GEN2(devid) || IS_GEN3(devid))
+ options.max_dimension = 2048;
+ else
+ options.max_dimension = 8192;
+ }
+ igt_info("Limiting buffer to %dx%d\n", options.max_dimension, options.max_dimension);
+
+ return 0;
+}
+
+static void init(void)
+{
+ int i;
+ unsigned tmp;
+
+ if (options.num_buffers == 0) {
+ tmp = gem_aperture_size(drm_fd);
+ tmp = min(256 * (1024 * 1024), tmp);
+ num_buffers = 2 * tmp / options.scratch_buf_size / 3;
+ num_buffers /= 2;
+ igt_info("using %u buffers\n", num_buffers);
+ } else
+ num_buffers = options.num_buffers;
+
+ bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+ drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+ drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr);
+ num_fences = gem_available_fences(drm_fd);
+ igt_assert_lt(4, num_fences);
+ batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+ busy_bo = drm_intel_bo_alloc(bufmgr, "tiled bo", BUSY_BUF_SIZE, 4096);
+ if (options.forced_tiling >= 0)
+ gem_set_tiling(drm_fd, busy_bo->handle, options.forced_tiling, 4096);
+
+ for (i = 0; i < num_buffers; i++) {
+ init_buffer(&buffers[0][i], options.scratch_buf_size);
+ init_buffer(&buffers[1][i], options.scratch_buf_size);
+
+ num_total_tiles += buffers[0][i].num_tiles;
+ }
+ current_set = 0;
+
+ /* just in case it helps reproducability */
+ srandom(0xdeadbeef);
+}
+
+static void check_render_copyfunc(void)
+{
+ struct igt_buf src, dst;
+ uint32_t *ptr;
+ int i, j, pass;
+
+ if (!options.check_render_cpyfn)
+ return;
+
+ init_buffer(&src, options.scratch_buf_size);
+ init_buffer(&dst, options.scratch_buf_size);
+
+ for (pass = 0; pass < 16; pass++) {
+ int sx = random() % (igt_buf_width(&src)-options.tile_size);
+ int sy = random() % (igt_buf_height(&src)-options.tile_size);
+ int dx = random() % (igt_buf_width(&dst)-options.tile_size);
+ int dy = random() % (igt_buf_height(&dst)-options.tile_size);
+
+ if (options.use_cpu_maps)
+ set_to_cpu_domain(&src, 1);
+
+ memset(src.data, 0xff, options.scratch_buf_size);
+ for (j = 0; j < options.tile_size; j++) {
+ ptr = (uint32_t*)((char *)src.data + sx*4 + (sy+j) * src.stride);
+ for (i = 0; i < options.tile_size; i++)
+ ptr[i] = j * options.tile_size + i;
+ }
+
+ render_copyfunc(&src, sx, sy, &dst, dx, dy, 0);
+
+ if (options.use_cpu_maps)
+ set_to_cpu_domain(&dst, 0);
+
+ for (j = 0; j < options.tile_size; j++) {
+ ptr = (uint32_t*)((char *)dst.data + dx*4 + (dy+j) * dst.stride);
+ for (i = 0; i < options.tile_size; i++)
+ if (ptr[i] != j * options.tile_size + i) {
+ igt_info("render copyfunc mismatch at (%d, %d): found %d, expected %d\n", i, j, ptr[i], j * options.tile_size + i);
+ }
+ }
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ int i, j;
+ unsigned *current_permutation, *tmp_permutation;
+ static struct option long_options[] = {
+ {"no-hw", 0, 0, 'd'},
+ {"buf-size", 1, 0, 's'},
+ {"gpu-busy-load", 1, 0, 'g'},
+ {"no-signals", 0, 0, 'S'},
+ {"buffer-count", 1, 0, 'c'},
+ {"trace-tile", 1, 0, 't'},
+ {"disable-blt", 0, 0, 'b'},
+ {"disable-render", 0, 0, 'r'},
+ {"untiled", 0, 0, 'u'},
+ {"x-tiled", 0, 0, 'x'},
+ {"use-cpu-maps", 0, 0, 'm'},
+ {"rounds", 1, 0, 'o'},
+ {"no-fail", 0, 0, 'f'},
+ {"tiles-per-buf", 0, 0, 'p'},
+ {"remove-duct-tape", 0, 0, DUCTAPE},
+ {"tile-size", 1, 0, TILESZ},
+ {"check-render-cpyfn", 0, 0, CHCK_RENDER},
+ {NULL, 0, 0, 0},
+ };
+
+ options.scratch_buf_size = 256*4096;
+ options.no_hw = 0;
+ options.use_signal_helper = 1;
+ options.gpu_busy_load = 0;
+ options.num_buffers = 0;
+ options.trace_tile = -1;
+ options.use_render = 1;
+ options.use_blt = 1;
+ options.forced_tiling = -1;
+ options.use_cpu_maps = 0;
+ options.total_rounds = 512;
+ options.fail = 1;
+ options.ducttape = 1;
+ options.tile_size = 16;
+ options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
+ options.check_render_cpyfn = 0;
+
+ igt_simple_init_parse_opts(&argc, argv,"ds:g:c:t:rbuxmo:fp:",
+ long_options, NULL, parse_options, NULL);
+
+ drm_fd = drm_open_driver(DRIVER_INTEL);
+ devid = intel_get_drm_devid(drm_fd);
+
+ /* start our little helper early before too may allocations occur */
+ if (options.use_signal_helper)
+ igt_fork_signal_helper();
+
+ init();
+
+ check_render_copyfunc();
+
+ tile_permutation = malloc(num_total_tiles*sizeof(uint32_t));
+ current_permutation = malloc(num_total_tiles*sizeof(uint32_t));
+ tmp_permutation = malloc(num_total_tiles*sizeof(uint32_t));
+ igt_assert(tile_permutation);
+ igt_assert(current_permutation);
+ igt_assert(tmp_permutation);
+
+ fan_out();
+
+ for (i = 0; i < options.total_rounds; i++) {
+ igt_info("round %i\n", i);
+ if (i % 64 == 63) {
+ fan_in_and_check();
+ igt_info("everything correct after %i rounds\n", i + 1);
+ }
+
+ target_set = (current_set + 1) & 1;
+ init_set(target_set);
+
+ for (j = 0; j < num_total_tiles; j++)
+ current_permutation[j] = j;
+ igt_permute_array(current_permutation, num_total_tiles, exchange_uint);
+
+ copy_tiles(current_permutation);
+
+ memcpy(tmp_permutation, tile_permutation, sizeof(unsigned)*num_total_tiles);
+
+ /* accumulate the permutations */
+ for (j = 0; j < num_total_tiles; j++)
+ tile_permutation[j] = current_permutation[tmp_permutation[j]];
+
+ current_set = target_set;
+ }
+
+ fan_in_and_check();
+
+ igt_info("num failed tiles %u, max incoherent bytes %zd\n", stats.num_failed, stats.max_failed_reads * sizeof(uint32_t));
+
+ intel_batchbuffer_free(batch);
+ drm_intel_bufmgr_destroy(bufmgr);
+
+ close(drm_fd);
+
+ igt_stop_signal_helper();
+
+ igt_exit();
+}