diff options
| author | Mika Kuoppala <mika.kuoppala@intel.com> | 2014-09-08 10:49:59 +0300 | 
|---|---|---|
| committer | Mika Kuoppala <mika.kuoppala@intel.com> | 2014-10-09 19:47:51 +0300 | 
| commit | 0e8ac72d5d608d82a91bb5232badfb872589ac14 (patch) | |
| tree | d4a515e7787b51fb5e691508bbda2a9cba00f273 /tools | |
| parent | b69659c3f5ed285bd218350deeff52761aec3d10 (diff) | |
tools/null_state_gen: Add Gen8 golden state
Previously we didn't have a clear understanding what is necessary
for a pipeline state to be properly initialized. So we had to improvise
and use a stripped out render copy.
Now we have a more clear understanding so switch out render copy based
frankenstate to state we can call golden state.
v2: - export intel_batch_state_offset
    - add 3DSTATE_RASTER (Bradley Volkin)
Cc: Volkin, Bradley D <bradley.d.volkin@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/null_state_gen/intel_batchbuffer.h | 2 | ||||
| -rw-r--r-- | tools/null_state_gen/intel_renderstate_gen8.c | 850 | 
2 files changed, 278 insertions, 574 deletions
| diff --git a/tools/null_state_gen/intel_batchbuffer.h b/tools/null_state_gen/intel_batchbuffer.h index f85f31db..8b87c020 100644 --- a/tools/null_state_gen/intel_batchbuffer.h +++ b/tools/null_state_gen/intel_batchbuffer.h @@ -84,7 +84,7 @@ uint32_t intel_batch_state_copy(struct intel_batchbuffer *batch, void *d, unsign  				const char *name);  uint32_t intel_batch_state_alloc(struct intel_batchbuffer *batch, unsigned bytes, unsigned align,  				 const char *name); - +uint32_t intel_batch_state_offset(struct intel_batchbuffer *batch, unsigned align);  unsigned intel_batch_num_cmds(struct intel_batchbuffer *batch);  struct bb_item *intel_batch_cmd_get(struct intel_batchbuffer *batch, unsigned i); diff --git a/tools/null_state_gen/intel_renderstate_gen8.c b/tools/null_state_gen/intel_renderstate_gen8.c index 73375a06..2d7a4b0e 100644 --- a/tools/null_state_gen/intel_renderstate_gen8.c +++ b/tools/null_state_gen/intel_renderstate_gen8.c @@ -29,708 +29,412 @@  #include <lib/intel_reg.h>  #include <string.h> -struct { -	uint32_t cc_state; -	uint32_t blend_state; -} cc; - -struct { -	uint32_t cc_state; -	uint32_t sf_clip_state; -} viewport; - -/* see shaders/ps/blit.g7a */ -static const uint32_t ps_kernel[][4] = { -#if 1 -   { 0x0060005a, 0x21403ae8, 0x3a0000c0, 0x008d0040 }, -   { 0x0060005a, 0x21603ae8, 0x3a0000c0, 0x008d0080 }, -   { 0x0060005a, 0x21803ae8, 0x3a0000d0, 0x008d0040 }, -   { 0x0060005a, 0x21a03ae8, 0x3a0000d0, 0x008d0080 }, -   { 0x02800031, 0x2e0022e8, 0x0e000140, 0x08840001 }, -   { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 }, -#else -   /* Write all -1 */ -   { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 }, -   { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 }, -   { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 }, -   { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 }, -   { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 }, -   { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 }, -   { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 }, -   { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 }, -   { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 }, -#endif -}; - -static uint32_t -gen8_bind_buf_null(struct intel_batchbuffer *batch) +static void gen8_emit_wm(struct intel_batchbuffer *batch)  { -	struct gen8_surface_state ss; -	memset(&ss, 0, sizeof(ss)); - -	return OUT_STATE_STRUCT(ss, 64); -} - -static uint32_t -gen8_bind_surfaces(struct intel_batchbuffer *batch) -{ -	unsigned offset; - -	offset = intel_batch_state_alloc(batch, 8, 32, "bind surfaces"); - -	bb_area_emit_offset(batch->state, offset, gen8_bind_buf_null(batch), STATE_OFFSET, "bind 1"); -	bb_area_emit_offset(batch->state, offset + 4, gen8_bind_buf_null(batch), STATE_OFFSET, "bind 2"); - -	return offset; -} - -/* Mostly copy+paste from gen6, except wrap modes moved */ -static uint32_t -gen8_create_sampler(struct intel_batchbuffer *batch) { -	struct gen8_sampler_state ss; -	memset(&ss, 0, sizeof(ss)); - -	ss.ss0.min_filter = GEN6_MAPFILTER_NEAREST; -	ss.ss0.mag_filter = GEN6_MAPFILTER_NEAREST; -	ss.ss3.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; -	ss.ss3.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; -	ss.ss3.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; - -	/* I've experimented with non-normalized coordinates and using the LD -	 * sampler fetch, but couldn't make it work. */ -	ss.ss3.non_normalized_coord = 0; - -	return OUT_STATE_STRUCT(ss, 64); -} - -static uint32_t -gen8_fill_ps(struct intel_batchbuffer *batch, -	     const uint32_t kernel[][4], -	     size_t size) -{ -	return intel_batch_state_copy(batch, kernel, size, 64, "ps kernel"); +	OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2)); +	OUT_BATCH(GEN7_WM_LEGACY_DIAMOND_LINE_RASTERIZATION);  } -/** - * gen7_fill_vertex_buffer_data populate vertex buffer with data. - * - * The vertex buffer consists of 3 vertices to construct a RECTLIST. The 4th - * vertex is implied (automatically derived by the HW). Each element has the - * destination offset, and the normalized texture offset (src). The rectangle - * itself will span the entire subsurface to be copied. - * - * see gen6_emit_vertex_elements - */ -static uint32_t -gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch) +static void gen8_emit_ps(struct intel_batchbuffer *batch)  { -	uint16_t *v; - -	return intel_batch_state_alloc(batch, 2 * sizeof(*v), 8, "vertex buffer"); -} - -/** - * gen6_emit_vertex_elements - The vertex elements describe the contents of the - * vertex buffer. We pack the vertex buffer in a semi weird way, conforming to - * what gen6_rendercopy did. The most straightforward would be to store - * everything as floats. - * - * see gen7_fill_vertex_buffer_data() for where the corresponding elements are - * packed. - */ -static void -gen6_emit_vertex_elements(struct intel_batchbuffer *batch) { -	/* -	 * The VUE layout -	 *    dword 0-3: pad (0, 0, 0. 0) -	 *    dword 4-7: position (x, y, 0, 1.0), -	 *    dword 8-11: texture coordinate 0 (u0, v0, 0, 1.0) -	 */ -	OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | (3 * 2 + 1 - 2)); - -	/* Element state 0. These are 4 dwords of 0 required for the VUE format. -	 * We don't really know or care what they do. -	 */ -	OUT_BATCH(0 << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | -		  GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | -		  0 << VE0_OFFSET_SHIFT); /* we specify 0, but it's really does not exist */ -	OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | -		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | -		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | -		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); - -	/* Element state 1 - Our "destination" vertices. These are passed down -	 * through the pipeline, and eventually make it to the pixel shader as -	 * the offsets in the destination surface. It's packed as the 16 -	 * signed/scaled because of gen6 rendercopy. I see no particular reason -	 * for doing this though. -	 */ -	OUT_BATCH(0 << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | -		  GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | -		  0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */ -	OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | -		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | -		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | -		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); - -	/* Element state 2. Last but not least we store the U,V components as -	 * normalized floats. These will be used in the pixel shader to sample -	 * from the source buffer. -	 */ -	OUT_BATCH(0 << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | -		  GEN6_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT | -		  4 << VE0_OFFSET_SHIFT);	/* offset vb in bytes */ -	OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | -		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | -		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | -		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); -} - -/** - * gen7_emit_vertex_buffer emit the vertex buffers command - * - * @batch - * @offset - bytw offset within the @batch where the vertex buffer starts. - */ -static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch, -				    uint32_t offset) { -	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | (1 + (4 * 1) - 2)); -	OUT_BATCH(0 << VB0_BUFFER_INDEX_SHIFT | /* VB 0th index */ -		  GEN7_VB0_BUFFER_ADDR_MOD_EN | /* Address Modify Enable */ -		  VB0_NULL_VERTEX_BUFFER | -		  0 << VB0_BUFFER_PITCH_SHIFT); -	OUT_RELOC_STATE(batch, I915_GEM_DOMAIN_VERTEX, 0, offset); +	OUT_BATCH(GEN7_3DSTATE_PS | (12 - 2));  	OUT_BATCH(0); +	OUT_BATCH(0); /* kernel hi */ +	OUT_BATCH(GEN7_PS_SPF_MODE); +	OUT_BATCH(0); /* scratch space stuff */ +	OUT_BATCH(0); /* scratch hi */  	OUT_BATCH(0); +	OUT_BATCH(0); +	OUT_BATCH(0); // kernel 1 +	OUT_BATCH(0); /* kernel 1 hi */ +	OUT_BATCH(0); // kernel 2 +	OUT_BATCH(0); /* kernel 2 hi */  } -static uint32_t -gen6_create_cc_state(struct intel_batchbuffer *batch) -{ -	struct gen6_color_calc_state cc_state; -	memset(&cc_state, 0, sizeof(cc_state)); - -	return OUT_STATE_STRUCT(cc_state, 64); -} - -static uint32_t -gen8_create_blend_state(struct intel_batchbuffer *batch) -{ -	struct gen8_blend_state blend; -	int i; - -	memset(&blend, 0, sizeof(blend)); - -	for (i = 0; i < 16; i++) { -		blend.bs[i].dest_blend_factor = GEN6_BLENDFACTOR_ZERO; -		blend.bs[i].source_blend_factor = GEN6_BLENDFACTOR_ONE; -		blend.bs[i].color_blend_func = GEN6_BLENDFUNCTION_ADD; -		blend.bs[i].pre_blend_color_clamp = 1; -		blend.bs[i].color_buffer_blend = 0; -	} - -	return OUT_STATE_STRUCT(blend, 64); -} - -static uint32_t -gen6_create_cc_viewport(struct intel_batchbuffer *batch) +static void gen8_emit_sf(struct intel_batchbuffer *batch)  { -	struct gen6_cc_viewport vp; - -	memset(&vp, 0, sizeof(vp)); - -	/* XXX I don't understand this */ -	vp.min_depth = -1.e35; -	vp.max_depth = 1.e35; - -	return OUT_STATE_STRUCT(vp, 32); -} - -static uint32_t -gen7_create_sf_clip_viewport(struct intel_batchbuffer *batch) { -	/* XXX these are likely not needed */ -	struct gen7_sf_clip_viewport scv_state; - -	memset(&scv_state, 0, sizeof(scv_state)); - -	scv_state.guardband.xmin = 0; -	scv_state.guardband.xmax = 1.0f; -	scv_state.guardband.ymin = 0; -	scv_state.guardband.ymax = 1.0f; - -	return OUT_STATE_STRUCT(scv_state, 64); +	OUT_BATCH(GEN6_3DSTATE_SF | (4 - 2)); +	OUT_BATCH(0); +	OUT_BATCH(0); +	OUT_BATCH(1 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT | +		  1 << GEN6_3DSTATE_SF_VERTEX_SUB_PIXEL_PRECISION_SHIFT | +		  GEN7_SF_POINT_WIDTH_FROM_SOURCE | +		  8);  } -static uint32_t -gen6_create_scissor_rect(struct intel_batchbuffer *batch) +static void gen8_emit_vs(struct intel_batchbuffer *batch)  { -	struct gen6_scissor_rect scissor; - -	memset(&scissor, 0, sizeof(scissor)); - -	return OUT_STATE_STRUCT(scissor, 64); -} - -static void -gen8_emit_sip(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN6_STATE_SIP | (3 - 2)); +	OUT_BATCH(GEN6_3DSTATE_VS | (9 - 2));  	OUT_BATCH(0);  	OUT_BATCH(0); -} - -static void -gen7_emit_push_constants(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS); +	OUT_BATCH(GEN7_VS_FLOATING_POINT_MODE_ALTERNATE);  	OUT_BATCH(0); -	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS);  	OUT_BATCH(0); -	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS);  	OUT_BATCH(0); -	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS);  	OUT_BATCH(0); -	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS);  	OUT_BATCH(0);  } -static void -gen8_emit_state_base_address(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2)); - -	/* general */ -	OUT_BATCH(0 | BASE_ADDRESS_MODIFY); +static void gen8_emit_hs(struct intel_batchbuffer *batch) +{ +	OUT_BATCH(GEN7_3DSTATE_HS | (9 - 2));  	OUT_BATCH(0); - -	/* stateless data port */ -	OUT_BATCH(0 | BASE_ADDRESS_MODIFY); - -	/* surface */ -	OUT_RELOC(batch, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);  	OUT_BATCH(0); - -	/* dynamic */ -	OUT_RELOC(batch, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, -		  0, BASE_ADDRESS_MODIFY);  	OUT_BATCH(0); - -	/* indirect */  	OUT_BATCH(0);  	OUT_BATCH(0); - -	/* instruction */ -	OUT_RELOC(batch, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);  	OUT_BATCH(0); +	OUT_BATCH(1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT); +	OUT_BATCH(0); +} -	/* general state buffer size */ -	OUT_BATCH(0xfffff000 | 1); -	/* dynamic state buffer size */ -	OUT_BATCH(1 << 12 | 1); -	/* indirect object buffer size */ -	OUT_BATCH(0xfffff000 | 1); -	/* intruction buffer size */ -	OUT_BATCH(1 << 12 | 1); +static void gen8_emit_raster(struct intel_batchbuffer *batch) +{ +	OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2)); +	OUT_BATCH(0); +	OUT_BATCH(0.0); +	OUT_BATCH(0.0); +	OUT_BATCH(0.0);  } -static void -gen7_emit_urb(struct intel_batchbuffer *batch) { -	/* XXX: Min valid values from mesa */ +static void gen8_emit_urb(struct intel_batchbuffer *batch) +{  	const int vs_entries = 64;  	const int vs_size = 2;  	const int vs_start = 4;  	OUT_BATCH(GEN7_3DSTATE_URB_VS);  	OUT_BATCH(vs_entries | ((vs_size - 1) << 16) | (vs_start << 25)); -	OUT_BATCH(GEN7_3DSTATE_URB_GS); -	OUT_BATCH(vs_start << 25); +  	OUT_BATCH(GEN7_3DSTATE_URB_HS); -	OUT_BATCH(vs_start << 25); -	OUT_BATCH(GEN7_3DSTATE_URB_DS); -	OUT_BATCH(vs_start << 25); -} +	OUT_BATCH(0x0f << 25); -static void -gen8_emit_cc(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS); -	OUT_BATCH_STATE_OFFSET(cc.blend_state | 1); +	OUT_BATCH(GEN7_3DSTATE_URB_DS); +	OUT_BATCH(0x0f << 25); -	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS); -	OUT_BATCH_STATE_OFFSET(cc.cc_state | 1); +	OUT_BATCH(GEN7_3DSTATE_URB_GS); +	OUT_BATCH(0x0f << 25);  } -static void -gen8_emit_multisample(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE); -	OUT_BATCH(0); - -	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK); -	OUT_BATCH(1); +static void gen8_emit_vf_topology(struct intel_batchbuffer *batch) +{ +	OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY); +	OUT_BATCH(_3DPRIM_TRILIST);  } -static void -gen8_emit_vs(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS); -	OUT_BATCH(0); - -	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS); -	OUT_BATCH(0); +static void gen8_emit_so_decl_list(struct intel_batchbuffer *batch) +{ +	const int num_decls = 128; +	int i; -	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (11 - 2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +	OUT_BATCH(GEN8_3DSTATE_SO_DECL_LIST | ((2 * num_decls) + 1));  	OUT_BATCH(0); +	OUT_BATCH(num_decls); -	OUT_BATCH(GEN6_3DSTATE_VS | (9-2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +	for (i = 0; i < num_decls; i++) { +		OUT_BATCH(0); +		OUT_BATCH(0); +	}  } -static void -gen8_emit_hs(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (11 - 2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); - -	OUT_BATCH(GEN7_3DSTATE_HS | (9-2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +static void gen8_emit_so_buffer(struct intel_batchbuffer *batch, const int index) +{ +	OUT_BATCH(GEN8_3DSTATE_SO_BUFFER | (8 - 2)); +	OUT_BATCH(index << 29);  	OUT_BATCH(0);  	OUT_BATCH(0);  	OUT_BATCH(0);  	OUT_BATCH(0); - -	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS);  	OUT_BATCH(0); - -	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS);  	OUT_BATCH(0);  } -static void -gen8_emit_gs(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (11 - 2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); - -	OUT_BATCH(GEN7_3DSTATE_GS | (10-2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); - -	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS); -	OUT_BATCH(0); +static void gen8_emit_state_base_address(struct intel_batchbuffer *batch) { +	const unsigned offset = 0; +	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2)); -	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS); +	/* general */ +	OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY);  	OUT_BATCH(0); -} -static void -gen8_emit_ds(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (11 - 2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +	/* stateless data port */  	OUT_BATCH(0); -	OUT_BATCH(GEN7_3DSTATE_DS | (9-2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +	/* surface state base addess */ +	OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY);  	OUT_BATCH(0); -	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS); +	/* dynamic state base address */ +	OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY);  	OUT_BATCH(0); -	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS); +	/* indirect */ +	OUT_BATCH(BASE_ADDRESS_MODIFY);  	OUT_BATCH(0); -} -static void -gen8_emit_wm_hz_op(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN8_3DSTATE_WM_HZ_OP | (5-2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +	/* instruction */ +	OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY);  	OUT_BATCH(0); -} -static void -gen8_emit_null_state(struct intel_batchbuffer *batch) { -	gen8_emit_wm_hz_op(batch); -	gen8_emit_hs(batch); -	OUT_BATCH(GEN7_3DSTATE_TE | (4-2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	gen8_emit_gs(batch); -	gen8_emit_ds(batch); -	gen8_emit_vs(batch); +	/* general state buffer size */ +	OUT_BATCH(GEN8_STATE_SIZE_PAGES(1) | BUFFER_SIZE_MODIFY); +	/* dynamic state buffer size */ +	OUT_BATCH(GEN8_STATE_SIZE_PAGES(1) | BUFFER_SIZE_MODIFY); +	/* indirect object buffer size */ +	OUT_BATCH(0 | BUFFER_SIZE_MODIFY); +	/* intruction buffer size */ +	OUT_BATCH(GEN8_STATE_SIZE_PAGES(1) | BUFFER_SIZE_MODIFY);  } -static void -gen7_emit_clip(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); +static void gen8_emit_chroma_key(struct intel_batchbuffer *batch, const int index) +{ +	OUT_BATCH(GEN6_3DSTATE_CHROMA_KEY | (4 - 2)); +	OUT_BATCH(index << 30);  	OUT_BATCH(0); -	OUT_BATCH(0); /*  pass-through */  	OUT_BATCH(0);  } -static void -gen8_emit_sf(struct intel_batchbuffer *batch) +static void gen8_emit_vertex_buffers(struct intel_batchbuffer *batch)  { +	const int buffers = 33;  	int i; -	OUT_BATCH(GEN7_3DSTATE_SBE | (4 - 2)); -	OUT_BATCH(1 << GEN7_SBE_NUM_OUTPUTS_SHIFT | -		  GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH | -		  GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET | -		  1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | -		  1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT); -	OUT_BATCH(0); -	OUT_BATCH(0); +	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | ((4 * buffers) - 1)); -	OUT_BATCH(GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); -	for (i = 0; i < 8; i++) +	for (i = 0; i < buffers; i++) { +		OUT_BATCH(i << VB0_BUFFER_INDEX_SHIFT | +			  GEN7_VB0_BUFFER_ADDR_MOD_EN); +		OUT_BATCH(0); /* Addr */  		OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +		OUT_BATCH(0); +	} +} -	OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2)); -	OUT_BATCH(GEN8_RASTER_FRONT_WINDING_CCW | GEN8_RASTER_CULL_NONE); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +static void gen6_emit_vertex_elements(struct intel_batchbuffer *batch) +{ +	const int elements = 34; +	int i; -	OUT_BATCH(GEN6_3DSTATE_SF | (4 - 2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +	OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | ((2 * elements - 1))); + +	for (i = 0; i < elements; i++) { +		if (i == 0) { +			OUT_BATCH(VE0_VALID | i); +			OUT_BATCH( +				GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | +				GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | +				GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | +				GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT +				); +		} else { +			OUT_BATCH(0); +			OUT_BATCH(0); +		} +	}  } -static void -gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) { -	const int max_threads = 63; - -	OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2)); -	OUT_BATCH(/* XXX: I don't understand the BARYCENTRIC stuff, but it -		   * appears we need it to put our setup data in the place we -		   * expect (g6, see below) */ -		  GEN7_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC); +static void gen8_emit_cc_state_pointers(struct intel_batchbuffer *batch) +{ +	union { +		float fval; +		uint32_t uval; +	} u; -	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (11-2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +	unsigned offset; -	OUT_BATCH(GEN7_3DSTATE_PS | (12-2)); -	OUT_BATCH_STATE_OFFSET(kernel); -	OUT_BATCH(0); /* kernel hi */ -	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | -		  2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); -	OUT_BATCH(0); /* scratch space stuff */ -	OUT_BATCH(0); /* scratch hi */ -	OUT_BATCH((max_threads - 1) << GEN8_3DSTATE_PS_MAX_THREADS_SHIFT | -		  GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); -	OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); -	OUT_BATCH(0); // kernel 1 -	OUT_BATCH(0); /* kernel 1 hi */ -	OUT_BATCH(0); // kernel 2 -	OUT_BATCH(0); /* kernel 2 hi */ +	u.fval = 1.0f; -	OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2)); -	OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT); +	offset = intel_batch_state_offset(batch, 64); +	OUT_STATE(0); +	OUT_STATE(0);      /* Alpha reference value */ +	OUT_STATE(u.uval); /* Blend constant color RED */ +	OUT_STATE(u.uval); /* Blend constant color BLUE */ +	OUT_STATE(u.uval); /* Blend constant color GREEN */ +	OUT_STATE(u.uval); /* Blend constant color ALPHA */ -	OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2)); -	OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE); +	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS); +	OUT_BATCH_STATE_OFFSET(offset | 1);  } -static void -gen8_emit_depth(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (8-2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +static void gen8_emit_blend_state_pointers(struct intel_batchbuffer *batch) +{ +	unsigned offset; +	int i; -	OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); +	offset = intel_batch_state_offset(batch, 64); -	OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2)); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -	OUT_BATCH(0); -} +	for (i = 0; i < 17; i++) +		OUT_STATE(0); -static void -gen7_emit_clear(struct intel_batchbuffer *batch) { -	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3-2)); -	OUT_BATCH(0); -	OUT_BATCH(1); // clear valid +	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); +	OUT_BATCH_STATE_OFFSET(offset | 1);  } -static void -gen6_emit_drawing_rectangle(struct intel_batchbuffer *batch) +static void gen8_emit_ps_extra(struct intel_batchbuffer *batch)  { -	OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); -	OUT_BATCH(0xffffffff); -	OUT_BATCH(0 | 0); -	OUT_BATCH(0); -} +        OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2)); +        OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | +		  GEN8_PSX_ATTRIBUTE_ENABLE); -static void gen8_emit_vf_topology(struct intel_batchbuffer *batch) -{ -	OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY); -	OUT_BATCH(_3DPRIM_RECTLIST);  } -/* Vertex elements MUST be defined before this according to spec */ -static void gen8_emit_primitive(struct intel_batchbuffer *batch) +static void gen8_emit_ps_blend(struct intel_batchbuffer *batch)  { -	OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2)); -	OUT_BATCH(0); -	OUT_BATCH(0); - -	OUT_BATCH(GEN6_3DPRIMITIVE | (7-2)); -	OUT_BATCH(0);	/* gen8+ ignore the topology type field */ -	OUT_BATCH(3);	/* vertex count */ -	OUT_BATCH(0);	/*  We're specifying this instead with offset in GEN6_3DSTATE_VERTEX_BUFFERS */ -	OUT_BATCH(1);	/* single instance */ -	OUT_BATCH(0);	/* start instance location */ -	OUT_BATCH(0);	/* index buffer offset, ignored */ +        OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2)); +        OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT);  } -void gen8_setup_null_render_state(struct intel_batchbuffer *batch) +static void gen8_emit_viewport_state_pointers_cc(struct intel_batchbuffer *batch)  { -	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table; -	uint32_t scissor_state; -	uint32_t vertex_buffer; -	uint32_t batch_end; -	int ret; +	unsigned offset; -	ps_binding_table  = gen8_bind_surfaces(batch); -	ps_sampler_state  = gen8_create_sampler(batch); -	ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel)); -	vertex_buffer = gen7_fill_vertex_buffer_data(batch); -	cc.cc_state = gen6_create_cc_state(batch); -	cc.blend_state = gen8_create_blend_state(batch); -	viewport.cc_state = gen6_create_cc_viewport(batch); -	viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch); -	scissor_state = gen6_create_scissor_rect(batch); -	/* TODO: theree is other state which isn't setup */ - -	/* Start emitting the commands. The order roughly follows the mesa blorp -	 * order */ -	OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); +	offset = intel_batch_state_offset(batch, 32); -	gen8_emit_sip(batch); +	OUT_STATE((uint32_t)0.0f); /* Minimum depth */ +	OUT_STATE((uint32_t)0.0f); /* Maximum depth */ -	gen7_emit_push_constants(batch); +	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); +	OUT_BATCH_STATE_OFFSET(offset); +} -	gen8_emit_state_base_address(batch); +static void gen8_emit_viewport_state_pointers_sf_clip(struct intel_batchbuffer *batch) +{ +	unsigned offset; +	int i; -	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC); -	OUT_BATCH_STATE_OFFSET(viewport.cc_state); -	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); -	OUT_BATCH_STATE_OFFSET(viewport.sf_clip_state); +	offset = intel_batch_state_offset(batch, 64); -	gen7_emit_urb(batch); +	for (i = 0; i < 16; i++) +		OUT_STATE(0); -	gen8_emit_cc(batch); +	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); +	OUT_BATCH_STATE_OFFSET(offset); +} -	gen8_emit_multisample(batch); +static void gen8_emit_primitive(struct intel_batchbuffer *batch) +{ +        OUT_BATCH(GEN6_3DPRIMITIVE | (7-2)); +        OUT_BATCH(4);   /* gen8+ ignore the topology type field */ +        OUT_BATCH(1);   /* vertex count */ +        OUT_BATCH(0); +        OUT_BATCH(1);   /* single instance */ +        OUT_BATCH(0);   /* start instance location */ +        OUT_BATCH(0);   /* index buffer offset, ignored */ +} -	gen8_emit_null_state(batch); +int gen8_setup_null_render_state(struct intel_batchbuffer *batch) +{ +	int ret; +	int i; -	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (5-2)); +#define GEN8_PIPE_CONTROL_GLOBAL_GTT   (1 << 24) + +	OUT_BATCH(GEN6_PIPE_CONTROL | (6 - 2)); +	OUT_BATCH(GEN8_PIPE_CONTROL_GLOBAL_GTT);  	OUT_BATCH(0);  	OUT_BATCH(0);  	OUT_BATCH(0);  	OUT_BATCH(0); -	gen7_emit_clip(batch); +	OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); +	gen8_emit_wm(batch); +	gen8_emit_ps(batch);  	gen8_emit_sf(batch); -	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS); -	OUT_BATCH_STATE_OFFSET(ps_binding_table); +	OUT_CMD(GEN7_3DSTATE_SBE, 4); +	OUT_CMD(GEN8_3DSTATE_SBE_SWIZ, 11); -	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS); -	OUT_BATCH_STATE_OFFSET(ps_sampler_state); - -	gen8_emit_ps(batch, ps_kernel_off); +	gen8_emit_vs(batch); +	gen8_emit_hs(batch); -	OUT_BATCH(GEN6_3DSTATE_SCISSOR_STATE_POINTERS); -	OUT_BATCH_STATE_OFFSET(scissor_state); +	OUT_CMD(GEN7_3DSTATE_GS, 10); +	OUT_CMD(GEN7_3DSTATE_STREAMOUT, 5); +	OUT_CMD(GEN7_3DSTATE_DS, 9); +	OUT_CMD(GEN6_3DSTATE_CLIP, 4); +	gen8_emit_raster(batch); +	OUT_CMD(GEN7_3DSTATE_TE, 4); +	OUT_CMD(GEN8_3DSTATE_VF, 2); +	OUT_CMD(GEN8_3DSTATE_WM_HZ_OP, 5); + +	gen8_emit_urb(batch); + +	OUT_CMD(GEN8_3DSTATE_BIND_TABLE_POOL_ALLOC, 4); +	OUT_CMD(GEN8_3DSTATE_GATHER_POOL_ALLOC, 4); +	OUT_CMD(GEN8_3DSTATE_DX9_CONSTANT_BUFFER_POOL_ALLOC, 4); +	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, 2); +	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS, 2); +	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS, 2); +	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, 2); +	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, 2); +	OUT_CMD(GEN6_3DSTATE_CONSTANT_VS, 11); +	OUT_CMD(GEN7_3DSTATE_CONSTANT_HS, 11); +	OUT_CMD(GEN7_3DSTATE_CONSTANT_DS, 11); +	OUT_CMD(GEN7_3DSTATE_CONSTANT_GS, 11); +	OUT_CMD(GEN7_3DSTATE_CONSTANT_PS, 11); +	OUT_CMD(GEN8_3DSTATE_VF_INSTANCING, 3); +	OUT_CMD(GEN8_3DSTATE_VF_SGVS, 2); -	gen8_emit_depth(batch); +	gen8_emit_vf_topology(batch); +	gen8_emit_so_decl_list(batch); -	gen7_emit_clear(batch); +	gen8_emit_so_buffer(batch, 0); +	gen8_emit_so_buffer(batch, 1); +	gen8_emit_so_buffer(batch, 2); +	gen8_emit_so_buffer(batch, 3); -	gen6_emit_drawing_rectangle(batch); +	gen8_emit_state_base_address(batch); -	gen7_emit_vertex_buffer(batch, vertex_buffer); +	OUT_CMD(GEN6_STATE_SIP, 3); +	OUT_CMD(GEN6_3DSTATE_DRAWING_RECTANGLE, 4); +	OUT_CMD(GEN7_3DSTATE_DEPTH_BUFFER, 8); + +	gen8_emit_chroma_key(batch, 0); +	gen8_emit_chroma_key(batch, 1); +	gen8_emit_chroma_key(batch, 2); +	gen8_emit_chroma_key(batch, 3); + +	OUT_CMD(GEN6_3DSTATE_LINE_STIPPLE, 3); +	OUT_CMD(GEN6_3DSTATE_AA_LINE_PARAMS, 3); +	OUT_CMD(GEN7_3DSTATE_STENCIL_BUFFER, 5); +	OUT_CMD(GEN7_3DSTATE_HIER_DEPTH_BUFFER, 5); +	OUT_CMD(GEN7_3DSTATE_CLEAR_PARAMS, 3); +	OUT_CMD(GEN6_3DSTATE_MONOFILTER_SIZE, 2); +	OUT_CMD(GEN8_3DSTATE_MULTISAMPLE, 2); +	OUT_CMD(GEN8_3DSTATE_POLY_STIPPLE_OFFSET, 2); +	OUT_CMD(GEN8_3DSTATE_POLY_STIPPLE_PATTERN, 33); +	OUT_CMD(GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0, 16 + 1); +	OUT_CMD(GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1, 16 + 1); +	OUT_CMD(GEN6_3DSTATE_INDEX_BUFFER, 5); + +	gen8_emit_vertex_buffers(batch);  	gen6_emit_vertex_elements(batch); -	gen8_emit_vf_topology(batch); +	OUT_BATCH(GEN6_3DSTATE_VF_STATISTICS | 1); /* Enable */ + +	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, 2); +	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS, 2); +	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS, 2); +	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS, 2); +	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS, 2); + +	gen8_emit_cc_state_pointers(batch); +	gen8_emit_blend_state_pointers(batch); + +	gen8_emit_ps_extra(batch); +	gen8_emit_ps_blend(batch); + +	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, 2); +	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS, 2); +	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS, 2); +	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS, 2); +	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS, 2); + +	OUT_CMD(GEN6_3DSTATE_SCISSOR_STATE_POINTERS, 2); + +	gen8_emit_viewport_state_pointers_cc(batch); +	gen8_emit_viewport_state_pointers_sf_clip(batch); +  	gen8_emit_primitive(batch);  	OUT_BATCH(MI_BATCH_BUFFER_END); | 
