summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2011-03-27 21:33:29 +0200
committerDaniel Vetter <daniel.vetter@ffwll.ch>2011-03-29 22:52:50 +0200
commitcd640cca6c0eb5936eaf48e43ebc5b3bf7278f6d (patch)
tree61508ac7a13b25ed96e87c409802e1ab0fdc6a01 /tests
parent8ab88c9af062512dee6dc2c43d400a71b165c284 (diff)
gem_stress: render copy on gen3
Headers copied over from xf86-video-intel, code built after the Xrender support. Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'tests')
-rw-r--r--tests/gem_stress.c250
1 files changed, 246 insertions, 4 deletions
diff --git a/tests/gem_stress.c b/tests/gem_stress.c
index 3fe1248a..aaa57c52 100644
--- a/tests/gem_stress.c
+++ b/tests/gem_stress.c
@@ -66,6 +66,8 @@
#include "intel_bufmgr.h"
#include "intel_batchbuffer.h"
#include "intel_gpu_tools.h"
+#include "i915_reg.h"
+#include "i915_3d.h"
#define CMD_POLY_STIPPLE_OFFSET 0x7906
@@ -202,7 +204,7 @@ static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigne
unsigned src_ofs = src_x + j + src_stride * (src_y + i);
unsigned expect = logical_tile_no*TILE_SIZE*TILE_SIZE
+ i*TILE_SIZE + j;
- uint32_t tmp = src[src_ofs];
+ uint32_t tmp = src[src_ofs];
if (tmp != expect) {
printf("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n",
logical_tile_no, i*TILE_SIZE + j, tmp, expect, (int) tmp - expect);
@@ -293,10 +295,10 @@ static void blitter_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned s
dst_pitch);
OUT_BATCH(dst_y << 16 | dst_x);
OUT_BATCH((dst_y+TILE_SIZE) << 16 | (dst_x+TILE_SIZE));
- OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+ OUT_RELOC_FENCED(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
OUT_BATCH(src_y << 16 | src_x);
OUT_BATCH(src_pitch);
- OUT_RELOC(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+ OUT_RELOC_FENCED(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0);
ADVANCE_BATCH();
if (!(keep_gpu_busy_counter & 1) && !fence_storm)
@@ -315,6 +317,241 @@ static void blitter_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned s
}
}
+static unsigned buf_width(struct scratch_buf *buf)
+{
+ return buf->stride/sizeof(uint32_t);
+}
+
+static unsigned buf_height(struct scratch_buf *buf)
+{
+ return options.scratch_buf_size/buf->stride;
+}
+
+static void emit_vertex(float f)
+{
+ union { float f; uint32_t ui; } u;
+ u.f = f;
+ OUT_BATCH(u.ui);
+}
+
+static void emit_vertex_normalized(float f, float total)
+{
+ union { float f; uint32_t ui; } u;
+ u.f = f / total;
+ OUT_BATCH(u.ui);
+}
+
+static void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
+ struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
+ unsigned logical_tile_no)
+{
+ uint32_t src_pitch, dst_pitch, cmd_bits;
+ src_pitch = src->stride;
+ dst_pitch = dst->stride;
+ cmd_bits = 0;
+ static unsigned keep_gpu_busy_counter = 0;
+
+ /* check both edges of the fence usage */
+ if (keep_gpu_busy_counter & 1 && !fence_storm)
+ keep_gpu_busy();
+
+ /* invariant state */
+ {
+ OUT_BATCH(_3DSTATE_AA_CMD |
+ AA_LINE_ECAAR_WIDTH_ENABLE |
+ AA_LINE_ECAAR_WIDTH_1_0 |
+ AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+ OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+ IAB_MODIFY_ENABLE |
+ IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+ IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
+ IAB_SRC_FACTOR_SHIFT) |
+ IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
+ IAB_DST_FACTOR_SHIFT));
+ OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
+ CSB_TCB(0, 0) |
+ CSB_TCB(1, 1) |
+ CSB_TCB(2, 2) |
+ CSB_TCB(3, 3) |
+ CSB_TCB(4, 4) |
+ CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+ OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+ OUT_BATCH(_3DSTATE_MODES_4_CMD |
+ ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+ ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+ ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+ OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */
+ OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
+ S4_LINE_WIDTH_ONE |
+ S4_CULLMODE_NONE |
+ S4_VFMT_XY);
+ OUT_BATCH(0x00000000); /* Stencil. */
+ OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+ OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+ OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */
+ OUT_BATCH(0);
+ OUT_BATCH(_3DSTATE_STIPPLE);
+ OUT_BATCH(0x00000000);
+ OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+ }
+
+ /* samler state */
+ {
+#define TEX_COUNT 1
+ uint32_t tiling_bits = 0;
+ if (src->tiling != I915_TILING_NONE)
+ tiling_bits = MS3_TILED_SURFACE;
+ if (src->tiling == I915_TILING_Y)
+ tiling_bits |= MS3_TILE_WALK;
+
+ OUT_BATCH(_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+ OUT_BATCH((1 << TEX_COUNT) - 1);
+ OUT_RELOC(src->bo, I915_GEM_DOMAIN_SAMPLER, 0, 0);
+ OUT_BATCH(MAPSURF_32BIT | MT_32BIT_ARGB8888 |
+ tiling_bits |
+ (buf_height(src) - 1) << MS3_HEIGHT_SHIFT |
+ (buf_width(src) - 1) << MS3_WIDTH_SHIFT);
+ OUT_BATCH((src->stride/4-1) << MS4_PITCH_SHIFT);
+
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+ OUT_BATCH((1 << TEX_COUNT) - 1);
+ OUT_BATCH(MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+ FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+ FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+ OUT_BATCH(SS3_NORMALIZED_COORDS |
+ TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+ TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+ 0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+ OUT_BATCH(0x00000000);
+ }
+
+ /* render target state */
+ {
+ uint32_t tiling_bits = 0;
+ if (dst->tiling != I915_TILING_NONE)
+ tiling_bits = BUF_3D_TILED_SURFACE;
+ if (dst->tiling == I915_TILING_Y)
+ tiling_bits |= BUF_3D_TILE_WALK_Y;
+
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits |
+ BUF_3D_PITCH(dst->stride));
+ OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(COLR_BUF_ARGB8888 |
+ DSTORG_HORT_BIAS(0x8) |
+ DSTORG_VERT_BIAS(0x8));
+
+ /* draw rect is unconditional */
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(0x00000000);
+ OUT_BATCH(0x00000000); /* ymin, xmin */
+ OUT_BATCH(DRAW_YMAX(buf_height(dst) - 1) |
+ DRAW_XMAX(buf_width(dst) - 1));
+ /* yorig, xorig (relate to color buffer?) */
+ OUT_BATCH(0x00000000);
+ }
+
+ /* texfmt */
+ {
+ uint32_t ss2 = ~0;
+ ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT);
+ ss2 |= S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D);
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
+ OUT_BATCH(ss2);
+ OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+ BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+ BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+ BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
+ OUT_BATCH(0); /* no vbo */
+ OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) |
+ (4 << S1_VERTEX_PITCH_SHIFT));
+ }
+
+ /* frage shader */
+ {
+ OUT_BATCH(_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+ /* decl FS_T0 */
+ OUT_BATCH(D0_DCL |
+ REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+ REG_NR(FS_T0) << D0_NR_SHIFT |
+ ((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ /* decl FS_S0 */
+ OUT_BATCH(D0_DCL |
+ (REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+ (REG_NR(FS_S0) << D0_NR_SHIFT) |
+ ((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ /* texld(FS_OC, FS_S0, FS_T0 */
+ OUT_BATCH(T0_TEXLD |
+ (REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+ (REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+ (REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+ OUT_BATCH((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+ (REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+ OUT_BATCH(0);
+ }
+
+ OUT_BATCH(PRIM3D_RECTLIST | (3*4 - 1));
+ emit_vertex(dst_x + TILE_SIZE);
+ emit_vertex(dst_y + TILE_SIZE);
+ emit_vertex_normalized(src_x + TILE_SIZE, buf_width(src));
+ emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src));
+
+ emit_vertex(dst_x);
+ emit_vertex(dst_y + TILE_SIZE);
+ emit_vertex_normalized(src_x, buf_width(src));
+ emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src));
+
+ emit_vertex(dst_x);
+ emit_vertex(dst_y);
+ emit_vertex_normalized(src_x, buf_width(src));
+ emit_vertex_normalized(src_y, buf_height(src));
+
+ if (!(keep_gpu_busy_counter & 1) && !fence_storm)
+ keep_gpu_busy();
+
+ keep_gpu_busy_counter++;
+
+ intel_batchbuffer_flush(batch);
+}
+
+static void render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
+ struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
+ unsigned logical_tile_no)
+{
+ if (IS_GEN3(devid))
+ gen3_render_copyfunc(src, src_x, src_y,
+ dst, dst_x, dst_y,
+ logical_tile_no);
+ else
+ blitter_copyfunc(src, src_x, src_y,
+ dst, dst_x, dst_y,
+ logical_tile_no);
+}
+
static void next_copyfunc(int tile)
{
if (fence_storm) {
@@ -337,6 +574,10 @@ static void next_copyfunc(int tile)
if (tile == options.trace_tile)
printf(" using prw\n");
copyfunc = prw_copyfunc;
+ } else if (copyfunc_seq % 3 == 0) {
+ if (tile == options.trace_tile)
+ printf(" using render\n");
+ copyfunc = render_copyfunc;
} else {
if (tile == options.trace_tile)
printf(" using blitter\n");
@@ -433,6 +674,7 @@ static void exchange_buf(void *array, unsigned i, unsigned j)
memcpy(&buf_arr[j], &tmp, sizeof(struct scratch_buf));
}
+
/* libdrm is to clever and prevents us from changin tiling of buffers already
* used in relocations. */
static void set_tiling(drm_intel_bo *bo, unsigned *tiling, unsigned stride)
@@ -472,7 +714,6 @@ static void init_set(unsigned set)
gpu_busy_load = 6;
}
-
for (i = 0; i < num_buffers; i++) {
r = random();
if ((r & 3) != 0)
@@ -664,6 +905,7 @@ static void init(void)
bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+ drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr);
devid = intel_get_drm_devid(drm_fd);
num_fences = get_num_fences();
batch = intel_batchbuffer_alloc(bufmgr, devid);