From 68ff28a022dbaa26a20c8a3c0212011a006614b0 Mon Sep 17 00:00:00 2001 From: "Kalamarz, Lukasz" Date: Wed, 10 Oct 2018 12:48:37 +0200 Subject: libs: Add rendercopy support for GEN11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces a render copy shader for GEN11. The plumbing is same as with GEN9, so we can reuse it, extracting the common parts, and wrapping it in GEN-specific helpers. v2: Added gen11 shader source path next to its binary form Signed-off-by: Lukasz Kalamarz Cc: MichaƂ Winiarski Cc: Antonio Argenziano Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Katarzyna Dec Reviewed-by: Katarzyna Dec --- lib/i915/shaders/ps/blit.g11a | 22 ++++++++++ lib/intel_batchbuffer.c | 2 + lib/rendercopy.h | 5 +++ lib/rendercopy_gen9.c | 98 ++++++++++++++++++++++++++++++++++--------- 4 files changed, 107 insertions(+), 20 deletions(-) create mode 100644 lib/i915/shaders/ps/blit.g11a diff --git a/lib/i915/shaders/ps/blit.g11a b/lib/i915/shaders/ps/blit.g11a new file mode 100644 index 00000000..15fe78af --- /dev/null +++ b/lib/i915/shaders/ps/blit.g11a @@ -0,0 +1,22 @@ +/* This is the same shader as for previous gens. On Gen 11 instruction pln was deleted and needs to be replaced by mad. +This shader was generated using IGA tool (not assembler integrated into IGT) +*/ + +(W) mad(8|M0) acc0.0<1>:nf r6.7<0;0>:f r2.0<8;1>:f r6.0<0>:f +(W) mad(8|M0) r10.0<1>:f acc0.0<8;1>:nf r3.0<8;1>:f r6.1<0>:f + +(W) mad(8|M0) acc0.0<1>:nf r6.0<0;0>:f r4.0<8;1>:f r6.0<0>:f +(W) mad(8|M0) r11.0<1>:f acc0.0<8;1>:nf r5.0<8;1>:f r6.1<0>:f + +(W) mad(8|M0) acc0.0<1>:nf r6.4<0;0>:f r2.0<8;1>:f r6.4<0>:f +(W) mad(8|M0) r12.0<1>:f acc0.0<8;1>:nf r3.0<8;1>:f r6.5<0>:f + +(W) mad(8|M0) acc0.0<1>:nf r6.0<0;0>:f r4.0<8;1>:f r6.4<0>:f +(W) mad(8|M0) r13.0<1>:f acc0.0<8;1>:nf r5.0<8;1>:f r6.5<0>:f + +(W) send(16|M0) r112:f r10:ub 0x10000002 0x08840001 // SAMPLER wr:4, rd:8, fc: 0x40001 + mov (16|M0) r113.0<1>:f r12.0<8;8,1>:f + mov (16|M0) r115.0<1>:f r14.0<8;8,1>:f + mov (16|M0) r117.0<1>:f r16.0<8;8,1>:f + mov (16|M0) r119.0<1>:f r18.0<8;8,1>:f +(W) send(16|M0) null:f r112:ub 0x10000025 0x10031000 {EOT} // DP_RC wr:8, rd:0, Render Target Write msc:16, to #0 diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c index 387404ff..c13b1dc4 100644 --- a/lib/intel_batchbuffer.c +++ b/lib/intel_batchbuffer.c @@ -843,6 +843,8 @@ igt_render_copyfunc_t igt_get_render_copyfunc(int devid) copy = gen8_render_copyfunc; else if (IS_GEN9(devid) || IS_GEN10(devid)) copy = gen9_render_copyfunc; + else if (IS_GEN11(devid)) + copy = gen11_render_copyfunc; return copy; } diff --git a/lib/rendercopy.h b/lib/rendercopy.h index d1bb6284..35c28dd9 100644 --- a/lib/rendercopy.h +++ b/lib/rendercopy.h @@ -23,6 +23,11 @@ static inline void emit_vertex_normalized(struct intel_batchbuffer *batch, OUT_BATCH(u.ui); } +void gen11_render_copyfunc(struct intel_batchbuffer *batch, + drm_intel_context *context, + const struct igt_buf *src, unsigned src_x, unsigned src_y, + unsigned width, unsigned height, + const struct igt_buf *dst, unsigned dst_x, unsigned dst_y); void gen9_render_copyfunc(struct intel_batchbuffer *batch, drm_intel_context *context, const struct igt_buf *src, unsigned src_x, unsigned src_y, diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c index f324fbd7..adbd8124 100644 --- a/lib/rendercopy_gen9.c +++ b/lib/rendercopy_gen9.c @@ -50,23 +50,54 @@ struct { } viewport; /* see lib/i915/shaders/ps/blit.g7a */ -static const uint32_t ps_kernel[][4] = { +static const uint32_t ps_kernel_gen9[][4] = { #if 1 - { 0x0080005a, 0x2f403ae8, 0x3a0000c0, 0x008d0040 }, - { 0x0080005a, 0x2f803ae8, 0x3a0000d0, 0x008d0040 }, - { 0x02800031, 0x2e203a48, 0x0e8d0f40, 0x08840001 }, - { 0x05800031, 0x20003a40, 0x0e8d0e20, 0x90031000 }, + { 0x0080005a, 0x2f403ae8, 0x3a0000c0, 0x008d0040 }, + { 0x0080005a, 0x2f803ae8, 0x3a0000d0, 0x008d0040 }, + { 0x02800031, 0x2e203a48, 0x0e8d0f40, 0x08840001 }, + { 0x05800031, 0x20003a40, 0x0e8d0e20, 0x90031000 }, #else - /* Write all -1 */ - { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 }, - { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 }, - { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 }, - { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 }, - { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 }, - { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 }, - { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 }, - { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 }, - { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 }, + /* Write all -1 */ + { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 }, + { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 }, +#endif +}; + +/* see lib/i915/shaders/ps/blit.g11a */ +static const uint32_t ps_kernel_gen11[][4] = { +#if 1 + { 0x0060005b, 0x2000c01c, 0x07206601, 0x01800404 }, + { 0x0060005b, 0x7100480c, 0x0722003b, 0x01880406 }, + { 0x0060005b, 0x2000c01c, 0x07206601, 0x01800408 }, + { 0x0060005b, 0x7200480c, 0x0722003b, 0x0188040a }, + { 0x0060005b, 0x2000c01c, 0x07206e01, 0x01a00404 }, + { 0x0060005b, 0x7300480c, 0x0722003b, 0x01a80406 }, + { 0x0060005b, 0x2000c01c, 0x07206e01, 0x01a00408 }, + { 0x0060005b, 0x7400480c, 0x0722003b, 0x01a8040a }, + { 0x02800031, 0x21804a4c, 0x06000e20, 0x08840001 }, + { 0x00800001, 0x2e204b28, 0x008d0180, 0x00000000 }, + { 0x00800001, 0x2e604b28, 0x008d01c0, 0x00000000 }, + { 0x00800001, 0x2ea04b28, 0x008d0200, 0x00000000 }, + { 0x00800001, 0x2ee04b28, 0x008d0240, 0x00000000 }, + { 0x05800031, 0x20004a44, 0x06000e20, 0x90031000 }, +#else + /* Write all -1 */ + { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 }, + { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 }, #endif }; @@ -907,11 +938,14 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset #define BATCH_STATE_SPLIT 2048 -void gen9_render_copyfunc(struct intel_batchbuffer *batch, +static +void _gen9_render_copyfunc(struct intel_batchbuffer *batch, drm_intel_context *context, - const struct igt_buf *src, unsigned src_x, unsigned src_y, - unsigned width, unsigned height, - const struct igt_buf *dst, unsigned dst_x, unsigned dst_y) + const struct igt_buf *src, unsigned src_x, + unsigned src_y, unsigned width, unsigned height, + const struct igt_buf *dst, unsigned dst_x, + unsigned dst_y, const uint32_t ps_kernel[][4], + uint32_t ps_kernel_size) { uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table; uint32_t scissor_state; @@ -928,7 +962,7 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch, ps_binding_table = gen8_bind_surfaces(batch, src, dst); ps_sampler_state = gen8_create_sampler(batch); - ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel)); + ps_kernel_off = gen8_fill_ps(batch, ps_kernel, ps_kernel_size); vertex_buffer = gen7_fill_vertex_buffer_data(batch, src, src_x, src_y, dst_x, dst_y, @@ -1014,3 +1048,27 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch, gen6_render_flush(batch, context, batch_end); intel_batchbuffer_reset(batch); } + +void gen9_render_copyfunc(struct intel_batchbuffer *batch, + drm_intel_context *context, + const struct igt_buf *src, unsigned src_x, unsigned src_y, + unsigned width, unsigned height, + const struct igt_buf *dst, unsigned dst_x, unsigned dst_y) + +{ + _gen9_render_copyfunc(batch, context, src, src_x, src_y, + width, height, dst, dst_x, dst_y, ps_kernel_gen9, + sizeof(ps_kernel_gen9)); +} + +void gen11_render_copyfunc(struct intel_batchbuffer *batch, + drm_intel_context *context, + const struct igt_buf *src, unsigned src_x, unsigned src_y, + unsigned width, unsigned height, + const struct igt_buf *dst, unsigned dst_x, unsigned dst_y) + +{ + _gen9_render_copyfunc(batch, context, src, src_x, src_y, + width, height, dst, dst_x, dst_y, ps_kernel_gen11, + sizeof(ps_kernel_gen11)); +} -- cgit v1.2.3