diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2011-03-27 21:33:29 +0200 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2011-03-29 22:52:50 +0200 |
commit | cd640cca6c0eb5936eaf48e43ebc5b3bf7278f6d (patch) | |
tree | 61508ac7a13b25ed96e87c409802e1ab0fdc6a01 /tests | |
parent | 8ab88c9af062512dee6dc2c43d400a71b165c284 (diff) |
gem_stress: render copy on gen3
Headers copied over from xf86-video-intel, code built after the Xrender
support.
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/gem_stress.c | 250 |
1 files changed, 246 insertions, 4 deletions
diff --git a/tests/gem_stress.c b/tests/gem_stress.c index 3fe1248a..aaa57c52 100644 --- a/tests/gem_stress.c +++ b/tests/gem_stress.c @@ -66,6 +66,8 @@ #include "intel_bufmgr.h" #include "intel_batchbuffer.h" #include "intel_gpu_tools.h" +#include "i915_reg.h" +#include "i915_3d.h" #define CMD_POLY_STIPPLE_OFFSET 0x7906 @@ -202,7 +204,7 @@ static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigne unsigned src_ofs = src_x + j + src_stride * (src_y + i); unsigned expect = logical_tile_no*TILE_SIZE*TILE_SIZE + i*TILE_SIZE + j; - uint32_t tmp = src[src_ofs]; + uint32_t tmp = src[src_ofs]; if (tmp != expect) { printf("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n", logical_tile_no, i*TILE_SIZE + j, tmp, expect, (int) tmp - expect); @@ -293,10 +295,10 @@ static void blitter_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned s dst_pitch); OUT_BATCH(dst_y << 16 | dst_x); OUT_BATCH((dst_y+TILE_SIZE) << 16 | (dst_x+TILE_SIZE)); - OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_RELOC_FENCED(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(src_y << 16 | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0); + OUT_RELOC_FENCED(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0); ADVANCE_BATCH(); if (!(keep_gpu_busy_counter & 1) && !fence_storm) @@ -315,6 +317,241 @@ static void blitter_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned s } } +static unsigned buf_width(struct scratch_buf *buf) +{ + return buf->stride/sizeof(uint32_t); +} + +static unsigned buf_height(struct scratch_buf *buf) +{ + return options.scratch_buf_size/buf->stride; +} + +static void emit_vertex(float f) +{ + union { float f; uint32_t ui; } u; + u.f = f; + OUT_BATCH(u.ui); +} + +static void emit_vertex_normalized(float f, float total) +{ + union { float f; uint32_t ui; } u; + u.f = f / total; + OUT_BATCH(u.ui); +} + +static void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no) +{ + uint32_t src_pitch, dst_pitch, cmd_bits; + src_pitch = src->stride; + dst_pitch = dst->stride; + cmd_bits = 0; + static unsigned keep_gpu_busy_counter = 0; + + /* check both edges of the fence usage */ + if (keep_gpu_busy_counter & 1 && !fence_storm) + keep_gpu_busy(); + + /* invariant state */ + { + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) | + IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << + IAB_SRC_FACTOR_SHIFT) | + IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << + IAB_DST_FACTOR_SHIFT)); + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); + OUT_BATCH(_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) | + ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff)); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2); + OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */ + OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | + S4_LINE_WIDTH_ONE | + S4_CULLMODE_NONE | + S4_VFMT_XY); + OUT_BATCH(0x00000000); /* Stencil. */ + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_STIPPLE); + OUT_BATCH(0x00000000); + OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); + } + + /* samler state */ + { +#define TEX_COUNT 1 + uint32_t tiling_bits = 0; + if (src->tiling != I915_TILING_NONE) + tiling_bits = MS3_TILED_SURFACE; + if (src->tiling == I915_TILING_Y) + tiling_bits |= MS3_TILE_WALK; + + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * TEX_COUNT)); + OUT_BATCH((1 << TEX_COUNT) - 1); + OUT_RELOC(src->bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); + OUT_BATCH(MAPSURF_32BIT | MT_32BIT_ARGB8888 | + tiling_bits | + (buf_height(src) - 1) << MS3_HEIGHT_SHIFT | + (buf_width(src) - 1) << MS3_WIDTH_SHIFT); + OUT_BATCH((src->stride/4-1) << MS4_PITCH_SHIFT); + + OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT)); + OUT_BATCH((1 << TEX_COUNT) - 1); + OUT_BATCH(MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT | + FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | + FILTER_NEAREST << SS2_MIN_FILTER_SHIFT); + OUT_BATCH(SS3_NORMALIZED_COORDS | + TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT | + TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT | + 0 << SS3_TEXTUREMAP_INDEX_SHIFT); + OUT_BATCH(0x00000000); + } + + /* render target state */ + { + uint32_t tiling_bits = 0; + if (dst->tiling != I915_TILING_NONE) + tiling_bits = BUF_3D_TILED_SURFACE; + if (dst->tiling == I915_TILING_Y) + tiling_bits |= BUF_3D_TILE_WALK_Y; + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | + BUF_3D_PITCH(dst->stride)); + OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(COLR_BUF_ARGB8888 | + DSTORG_HORT_BIAS(0x8) | + DSTORG_VERT_BIAS(0x8)); + + /* draw rect is unconditional */ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0x00000000); + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(buf_height(dst) - 1) | + DRAW_XMAX(buf_width(dst) - 1)); + /* yorig, xorig (relate to color buffer?) */ + OUT_BATCH(0x00000000); + } + + /* texfmt */ + { + uint32_t ss2 = ~0; + ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); + OUT_BATCH(ss2); + OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | + BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | + BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | + BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | I1_LOAD_S(1) | 1); + OUT_BATCH(0); /* no vbo */ + OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | + (4 << S1_VERTEX_PITCH_SHIFT)); + } + + /* frage shader */ + { + OUT_BATCH(_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2)); + /* decl FS_T0 */ + OUT_BATCH(D0_DCL | + REG_TYPE(FS_T0) << D0_TYPE_SHIFT | + REG_NR(FS_T0) << D0_NR_SHIFT | + ((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); + OUT_BATCH(0); + OUT_BATCH(0); + /* decl FS_S0 */ + OUT_BATCH(D0_DCL | + (REG_TYPE(FS_S0) << D0_TYPE_SHIFT) | + (REG_NR(FS_S0) << D0_NR_SHIFT) | + ((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); + OUT_BATCH(0); + OUT_BATCH(0); + /* texld(FS_OC, FS_S0, FS_T0 */ + OUT_BATCH(T0_TEXLD | + (REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) | + (REG_NR(FS_OC) << T0_DEST_NR_SHIFT) | + (REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT)); + OUT_BATCH((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) | + (REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT)); + OUT_BATCH(0); + } + + OUT_BATCH(PRIM3D_RECTLIST | (3*4 - 1)); + emit_vertex(dst_x + TILE_SIZE); + emit_vertex(dst_y + TILE_SIZE); + emit_vertex_normalized(src_x + TILE_SIZE, buf_width(src)); + emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); + + emit_vertex(dst_x); + emit_vertex(dst_y + TILE_SIZE); + emit_vertex_normalized(src_x, buf_width(src)); + emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); + + emit_vertex(dst_x); + emit_vertex(dst_y); + emit_vertex_normalized(src_x, buf_width(src)); + emit_vertex_normalized(src_y, buf_height(src)); + + if (!(keep_gpu_busy_counter & 1) && !fence_storm) + keep_gpu_busy(); + + keep_gpu_busy_counter++; + + intel_batchbuffer_flush(batch); +} + +static void render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no) +{ + if (IS_GEN3(devid)) + gen3_render_copyfunc(src, src_x, src_y, + dst, dst_x, dst_y, + logical_tile_no); + else + blitter_copyfunc(src, src_x, src_y, + dst, dst_x, dst_y, + logical_tile_no); +} + static void next_copyfunc(int tile) { if (fence_storm) { @@ -337,6 +574,10 @@ static void next_copyfunc(int tile) if (tile == options.trace_tile) printf(" using prw\n"); copyfunc = prw_copyfunc; + } else if (copyfunc_seq % 3 == 0) { + if (tile == options.trace_tile) + printf(" using render\n"); + copyfunc = render_copyfunc; } else { if (tile == options.trace_tile) printf(" using blitter\n"); @@ -433,6 +674,7 @@ static void exchange_buf(void *array, unsigned i, unsigned j) memcpy(&buf_arr[j], &tmp, sizeof(struct scratch_buf)); } + /* libdrm is to clever and prevents us from changin tiling of buffers already * used in relocations. */ static void set_tiling(drm_intel_bo *bo, unsigned *tiling, unsigned stride) @@ -472,7 +714,6 @@ static void init_set(unsigned set) gpu_busy_load = 6; } - for (i = 0; i < num_buffers; i++) { r = random(); if ((r & 3) != 0) @@ -664,6 +905,7 @@ static void init(void) bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); drm_intel_bufmgr_gem_enable_reuse(bufmgr); + drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr); devid = intel_get_drm_devid(drm_fd); num_fences = get_num_fences(); batch = intel_batchbuffer_alloc(bufmgr, devid); |