• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "rendercopy.h"
2 #include "intel_chipset.h"
3 #include "gen4_render.h"
4 #include "surfaceformat.h"
5 
6 #include <assert.h>
7 
8 #define VERTEX_SIZE (3*4)
9 
10 #define URB_VS_ENTRY_SIZE	1
11 #define URB_GS_ENTRY_SIZE	0
12 #define URB_CL_ENTRY_SIZE	0
13 #define URB_SF_ENTRY_SIZE	2
14 #define URB_CS_ENTRY_SIZE	1
15 
16 #define GEN4_GRF_BLOCKS(nreg) (((nreg) + 15) / 16 - 1)
17 #define SF_KERNEL_NUM_GRF 16
18 #define PS_KERNEL_NUM_GRF 32
19 
20 static const uint32_t gen4_sf_kernel_nomask[][4] = {
21 	{ 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
22 	{ 0x00600001, 0x206003be, 0x00690060, 0x00000000 },
23 	{ 0x00600040, 0x20e077bd, 0x00690080, 0x006940a0 },
24 	{ 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
25 	{ 0x00600040, 0x20e077bd, 0x006900a0, 0x00694060 },
26 	{ 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
27 	{ 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
28 };
29 
30 static const uint32_t gen5_sf_kernel_nomask[][4] = {
31 	{ 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
32 	{ 0x00600001, 0x206003be, 0x00690060, 0x00000000 },
33 	{ 0x00600040, 0x20e077bd, 0x00690080, 0x006940a0 },
34 	{ 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
35 	{ 0x00600040, 0x20e077bd, 0x006900a0, 0x00694060 },
36 	{ 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
37 	{ 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },
38 };
39 
40 static const uint32_t gen4_ps_kernel_nomask_affine[][4] = {
41 	{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
42 	{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
43 	{ 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
44 	{ 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
45 	{ 0x00802059, 0x200077bc, 0x00000060, 0x008d0100 },
46 	{ 0x00802048, 0x204077be, 0x00000064, 0x008d0140 },
47 	{ 0x00802059, 0x200077bc, 0x00000070, 0x008d0100 },
48 	{ 0x00802048, 0x208077be, 0x00000074, 0x008d0140 },
49 	{ 0x00600201, 0x20200022, 0x008d0000, 0x00000000 },
50 	{ 0x00000201, 0x20280062, 0x00000000, 0x00000000 },
51 	{ 0x01800031, 0x21801d09, 0x008d0000, 0x02580001 },
52 	{ 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
53 	{ 0x00601001, 0x20c003be, 0x008d01a0, 0x00000000 },
54 	{ 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
55 	{ 0x00601001, 0x20e003be, 0x008d01e0, 0x00000000 },
56 	{ 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
57 	{ 0x00601001, 0x210003be, 0x008d0220, 0x00000000 },
58 	{ 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
59 	{ 0x00601001, 0x212003be, 0x008d0260, 0x00000000 },
60 	{ 0x00600201, 0x202003be, 0x008d0020, 0x00000000 },
61 	{ 0x00800031, 0x20001d28, 0x008d0000, 0x85a04800 },
62 };
63 
64 static const uint32_t gen5_ps_kernel_nomask_affine[][4] = {
65 	{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
66 	{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
67 	{ 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
68 	{ 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
69 	{ 0x00802059, 0x200077bc, 0x00000060, 0x008d0100 },
70 	{ 0x00802048, 0x204077be, 0x00000064, 0x008d0140 },
71 	{ 0x00802059, 0x200077bc, 0x00000070, 0x008d0100 },
72 	{ 0x00802048, 0x208077be, 0x00000074, 0x008d0140 },
73 	{ 0x01800031, 0x21801fa9, 0x208d0000, 0x0a8a0001 },
74 	{ 0x00802001, 0x304003be, 0x008d0180, 0x00000000 },
75 	{ 0x00802001, 0x306003be, 0x008d01c0, 0x00000000 },
76 	{ 0x00802001, 0x308003be, 0x008d0200, 0x00000000 },
77 	{ 0x00802001, 0x30a003be, 0x008d0240, 0x00000000 },
78 	{ 0x00600201, 0x202003be, 0x008d0020, 0x00000000 },
79 	{ 0x00800031, 0x20001d28, 0x548d0000, 0x94084800 },
80 };
81 
82 static uint32_t
batch_used(struct intel_batchbuffer * batch)83 batch_used(struct intel_batchbuffer *batch)
84 {
85 	return batch->ptr - batch->buffer;
86 }
87 
88 static uint32_t
batch_round_upto(struct intel_batchbuffer * batch,uint32_t divisor)89 batch_round_upto(struct intel_batchbuffer *batch, uint32_t divisor)
90 {
91 	uint32_t offset = batch_used(batch);
92 
93 	offset = (offset + divisor - 1) / divisor * divisor;
94 	batch->ptr = batch->buffer + offset;
95 	return offset;
96 }
97 
gen4_max_vs_nr_urb_entries(uint32_t devid)98 static int gen4_max_vs_nr_urb_entries(uint32_t devid)
99 {
100 	return IS_GEN5(devid) ? 256 : 32;
101 }
102 
gen4_max_sf_nr_urb_entries(uint32_t devid)103 static int gen4_max_sf_nr_urb_entries(uint32_t devid)
104 {
105 	return IS_GEN5(devid) ? 128 : 64;
106 }
107 
gen4_urb_size(uint32_t devid)108 static int gen4_urb_size(uint32_t devid)
109 {
110 	return IS_GEN5(devid) ? 1024 : IS_G4X(devid) ? 384 : 256;
111 }
112 
gen4_max_sf_threads(uint32_t devid)113 static int gen4_max_sf_threads(uint32_t devid)
114 {
115 	return IS_GEN5(devid) ? 48 : 24;
116 }
117 
gen4_max_wm_threads(uint32_t devid)118 static int gen4_max_wm_threads(uint32_t devid)
119 {
120 	return IS_GEN5(devid) ? 72 : IS_G4X(devid) ? 50 : 32;
121 }
122 
123 static void
gen4_render_flush(struct intel_batchbuffer * batch,drm_intel_context * context,uint32_t batch_end)124 gen4_render_flush(struct intel_batchbuffer *batch,
125 		  drm_intel_context *context, uint32_t batch_end)
126 {
127 	int ret;
128 
129 	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
130 	if (ret == 0)
131 		ret = drm_intel_gem_bo_context_exec(batch->bo, context,
132 						    batch_end, 0);
133 	assert(ret == 0);
134 }
135 
136 static uint32_t
gen4_bind_buf(struct intel_batchbuffer * batch,const struct igt_buf * buf,int is_dst)137 gen4_bind_buf(struct intel_batchbuffer *batch,
138 	      const struct igt_buf *buf,
139 	      int is_dst)
140 {
141 	struct gen4_surface_state *ss;
142 	uint32_t write_domain, read_domain;
143 	int ret;
144 
145 	igt_assert_lte(buf->stride, 128*1024);
146 	igt_assert_lte(igt_buf_width(buf), 8192);
147 	igt_assert_lte(igt_buf_height(buf), 8192);
148 
149 	if (is_dst) {
150 		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
151 	} else {
152 		write_domain = 0;
153 		read_domain = I915_GEM_DOMAIN_SAMPLER;
154 	}
155 
156 	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 32);
157 
158 	ss->ss0.surface_type = SURFACE_2D;
159 	switch (buf->bpp) {
160 		case 8: ss->ss0.surface_format = SURFACEFORMAT_R8_UNORM; break;
161 		case 16: ss->ss0.surface_format = SURFACEFORMAT_R8G8_UNORM; break;
162 		case 32: ss->ss0.surface_format = SURFACEFORMAT_B8G8R8A8_UNORM; break;
163 		case 64: ss->ss0.surface_format = SURFACEFORMAT_R16G16B16A16_FLOAT; break;
164 		default: igt_assert(0);
165 	}
166 
167 	ss->ss0.data_return_format = SURFACERETURNFORMAT_FLOAT32;
168 	ss->ss0.color_blend = 1;
169 	ss->ss1.base_addr = buf->bo->offset;
170 
171 	ret = drm_intel_bo_emit_reloc(batch->bo,
172 				      intel_batchbuffer_subdata_offset(batch, ss) + 4,
173 				      buf->bo, 0,
174 				      read_domain, write_domain);
175 	assert(ret == 0);
176 
177 	ss->ss2.height = igt_buf_height(buf) - 1;
178 	ss->ss2.width  = igt_buf_width(buf) - 1;
179 	ss->ss3.pitch  = buf->stride - 1;
180 	ss->ss3.tiled_surface = buf->tiling != I915_TILING_NONE;
181 	ss->ss3.tile_walk     = buf->tiling == I915_TILING_Y;
182 
183 	return intel_batchbuffer_subdata_offset(batch, ss);
184 }
185 
186 static uint32_t
gen4_bind_surfaces(struct intel_batchbuffer * batch,const struct igt_buf * src,const struct igt_buf * dst)187 gen4_bind_surfaces(struct intel_batchbuffer *batch,
188 		   const struct igt_buf *src,
189 		   const struct igt_buf *dst)
190 {
191 	uint32_t *binding_table;
192 
193 	binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 32);
194 
195 	binding_table[0] = gen4_bind_buf(batch, dst, 1);
196 	binding_table[1] = gen4_bind_buf(batch, src, 0);
197 
198 	return intel_batchbuffer_subdata_offset(batch, binding_table);
199 }
200 
201 static void
gen4_emit_sip(struct intel_batchbuffer * batch)202 gen4_emit_sip(struct intel_batchbuffer *batch)
203 {
204 	OUT_BATCH(GEN4_STATE_SIP | (2 - 2));
205 	OUT_BATCH(0);
206 }
207 
208 static void
gen4_emit_state_base_address(struct intel_batchbuffer * batch)209 gen4_emit_state_base_address(struct intel_batchbuffer *batch)
210 {
211 	if (IS_GEN5(batch->devid)) {
212 		OUT_BATCH(GEN4_STATE_BASE_ADDRESS | (8 - 2));
213 		OUT_RELOC(batch->bo, /* general */
214 			  I915_GEM_DOMAIN_INSTRUCTION, 0,
215 			  BASE_ADDRESS_MODIFY);
216 		OUT_RELOC(batch->bo, /* surface */
217 			  I915_GEM_DOMAIN_INSTRUCTION, 0,
218 			  BASE_ADDRESS_MODIFY);
219 		OUT_BATCH(0); /* media */
220 		OUT_RELOC(batch->bo, /* instruction */
221 			  I915_GEM_DOMAIN_INSTRUCTION, 0,
222 			  BASE_ADDRESS_MODIFY);
223 
224 		/* upper bounds, disable */
225 		OUT_BATCH(BASE_ADDRESS_MODIFY); /* general */
226 		OUT_BATCH(0); /* media */
227 		OUT_BATCH(BASE_ADDRESS_MODIFY); /* instruction */
228 	} else {
229 		OUT_BATCH(GEN4_STATE_BASE_ADDRESS | (6 - 2));
230 		OUT_RELOC(batch->bo, /* general */
231 			  I915_GEM_DOMAIN_INSTRUCTION, 0,
232 			  BASE_ADDRESS_MODIFY);
233 		OUT_RELOC(batch->bo, /* surface */
234 			  I915_GEM_DOMAIN_INSTRUCTION, 0,
235 			  BASE_ADDRESS_MODIFY);
236 		OUT_BATCH(0); /* media */
237 
238 		/* upper bounds, disable */
239 		OUT_BATCH(BASE_ADDRESS_MODIFY); /* general */
240 		OUT_BATCH(0); /* media */
241 	}
242 }
243 
244 static void
gen4_emit_pipelined_pointers(struct intel_batchbuffer * batch,uint32_t vs,uint32_t sf,uint32_t wm,uint32_t cc)245 gen4_emit_pipelined_pointers(struct intel_batchbuffer *batch,
246 			     uint32_t vs, uint32_t sf,
247 			     uint32_t wm, uint32_t cc)
248 {
249 	OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | (7 - 2));
250 	OUT_BATCH(vs);
251 	OUT_BATCH(GEN4_GS_DISABLE);
252 	OUT_BATCH(GEN4_CLIP_DISABLE);
253 	OUT_BATCH(sf);
254 	OUT_BATCH(wm);
255 	OUT_BATCH(cc);
256 }
257 
258 static void
gen4_emit_urb(struct intel_batchbuffer * batch)259 gen4_emit_urb(struct intel_batchbuffer *batch)
260 {
261 	int vs_entries = gen4_max_vs_nr_urb_entries(batch->devid);
262 	int gs_entries = 0;
263 	int cl_entries = 0;
264 	int sf_entries = gen4_max_sf_nr_urb_entries(batch->devid);
265 	int cs_entries = 0;
266 
267 	int urb_vs_end =              vs_entries * URB_VS_ENTRY_SIZE;
268 	int urb_gs_end = urb_vs_end + gs_entries * URB_GS_ENTRY_SIZE;
269 	int urb_cl_end = urb_gs_end + cl_entries * URB_CL_ENTRY_SIZE;
270 	int urb_sf_end = urb_cl_end + sf_entries * URB_SF_ENTRY_SIZE;
271 	int urb_cs_end = urb_sf_end + cs_entries * URB_CS_ENTRY_SIZE;
272 
273 	assert(urb_cs_end <= gen4_urb_size(batch->devid));
274 
275 	intel_batchbuffer_align(batch, 16);
276 
277 	OUT_BATCH(GEN4_URB_FENCE |
278 		  UF0_CS_REALLOC |
279 		  UF0_SF_REALLOC |
280 		  UF0_CLIP_REALLOC |
281 		  UF0_GS_REALLOC |
282 		  UF0_VS_REALLOC |
283 		  (3 - 2));
284 	OUT_BATCH(urb_cl_end << UF1_CLIP_FENCE_SHIFT |
285 		  urb_gs_end << UF1_GS_FENCE_SHIFT |
286 		  urb_vs_end << UF1_VS_FENCE_SHIFT);
287 	OUT_BATCH(urb_cs_end << UF2_CS_FENCE_SHIFT |
288 		  urb_sf_end << UF2_SF_FENCE_SHIFT);
289 
290 	OUT_BATCH(GEN4_CS_URB_STATE | (2 - 2));
291 	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | cs_entries << 0);
292 }
293 
294 static void
gen4_emit_null_depth_buffer(struct intel_batchbuffer * batch)295 gen4_emit_null_depth_buffer(struct intel_batchbuffer *batch)
296 {
297 	if (IS_G4X(batch->devid) || IS_GEN5(batch->devid)) {
298 		OUT_BATCH(GEN4_3DSTATE_DEPTH_BUFFER | (6 - 2));
299 		OUT_BATCH(SURFACE_NULL << GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
300 			  GEN4_DEPTHFORMAT_D32_FLOAT << GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
301 		OUT_BATCH(0);
302 		OUT_BATCH(0);
303 		OUT_BATCH(0);
304 		OUT_BATCH(0);
305 	} else {
306 		OUT_BATCH(GEN4_3DSTATE_DEPTH_BUFFER | (5 - 2));
307 		OUT_BATCH(SURFACE_NULL << GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
308 			  GEN4_DEPTHFORMAT_D32_FLOAT << GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
309 		OUT_BATCH(0);
310 		OUT_BATCH(0);
311 		OUT_BATCH(0);
312 	}
313 
314 	if (IS_GEN5(batch->devid)) {
315 		OUT_BATCH(GEN4_3DSTATE_CLEAR_PARAMS | (2 - 2));
316 		OUT_BATCH(0);
317 	}
318 }
319 
320 static void
gen4_emit_invariant(struct intel_batchbuffer * batch)321 gen4_emit_invariant(struct intel_batchbuffer *batch)
322 {
323 	OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
324 
325 	if (IS_GEN5(batch->devid) || IS_G4X(batch->devid))
326 		OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D);
327 	else
328 		OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
329 }
330 
331 static uint32_t
gen4_create_vs_state(struct intel_batchbuffer * batch)332 gen4_create_vs_state(struct intel_batchbuffer *batch)
333 {
334 	struct gen4_vs_state *vs;
335 	int nr_urb_entries;
336 
337 	vs = intel_batchbuffer_subdata_alloc(batch, sizeof(*vs), 32);
338 
339 	/* Set up the vertex shader to be disabled (passthrough) */
340 	nr_urb_entries = gen4_max_vs_nr_urb_entries(batch->devid);
341 	if (IS_GEN5(batch->devid))
342 		nr_urb_entries >>= 2;
343 	vs->vs4.nr_urb_entries = nr_urb_entries;
344 	vs->vs4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
345 	vs->vs6.vs_enable = 0;
346 	vs->vs6.vert_cache_disable = 1;
347 
348 	return intel_batchbuffer_subdata_offset(batch, vs);
349 }
350 
351 static uint32_t
gen4_create_sf_state(struct intel_batchbuffer * batch,uint32_t kernel)352 gen4_create_sf_state(struct intel_batchbuffer *batch,
353 		     uint32_t kernel)
354 {
355 	struct gen4_sf_state *sf;
356 
357 	sf = intel_batchbuffer_subdata_alloc(batch, sizeof(*sf), 32);
358 
359 	sf->sf0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
360 	sf->sf0.kernel_start_pointer = kernel >> 6;
361 
362 	sf->sf3.urb_entry_read_length = 1;  /* 1 URB per vertex */
363 	/* don't smash vertex header, read start from dw8 */
364 	sf->sf3.urb_entry_read_offset = 1;
365 	sf->sf3.dispatch_grf_start_reg = 3;
366 
367 	sf->sf4.max_threads = gen4_max_sf_threads(batch->devid) - 1;
368 	sf->sf4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
369 	sf->sf4.nr_urb_entries = gen4_max_sf_nr_urb_entries(batch->devid);
370 
371 	sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
372 	sf->sf6.dest_org_vbias = 0x8;
373 	sf->sf6.dest_org_hbias = 0x8;
374 
375 	return intel_batchbuffer_subdata_offset(batch, sf);
376 }
377 
378 static uint32_t
gen4_create_wm_state(struct intel_batchbuffer * batch,uint32_t kernel,uint32_t sampler)379 gen4_create_wm_state(struct intel_batchbuffer *batch,
380 		     uint32_t kernel,
381 		     uint32_t sampler)
382 {
383 	struct gen4_wm_state *wm;
384 
385 	wm = intel_batchbuffer_subdata_alloc(batch, sizeof(*wm), 32);
386 
387 	assert((kernel & 63) == 0);
388 	wm->wm0.kernel_start_pointer = kernel >> 6;
389 	wm->wm0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
390 
391 	wm->wm3.urb_entry_read_offset = 0;
392 	wm->wm3.dispatch_grf_start_reg = 3;
393 
394 	assert((sampler & 31) == 0);
395 	wm->wm4.sampler_state_pointer = sampler >> 5;
396 	wm->wm4.sampler_count = 1;
397 
398 	wm->wm5.max_threads = gen4_max_wm_threads(batch->devid);
399 	wm->wm5.thread_dispatch_enable = 1;
400 	wm->wm5.enable_16_pix = 1;
401 	wm->wm5.early_depth_test = 1;
402 
403 	if (IS_GEN5(batch->devid))
404 		wm->wm1.binding_table_entry_count = 0;
405 	else
406 		wm->wm1.binding_table_entry_count = 2;
407 	wm->wm3.urb_entry_read_length = 2;
408 
409 	return intel_batchbuffer_subdata_offset(batch, wm);
410 }
411 
412 static void
gen4_emit_binding_table(struct intel_batchbuffer * batch,uint32_t wm_table)413 gen4_emit_binding_table(struct intel_batchbuffer *batch,
414 			uint32_t wm_table)
415 {
416 	OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | (6 - 2));
417 	OUT_BATCH(0);		/* vs */
418 	OUT_BATCH(0);		/* gs */
419 	OUT_BATCH(0);		/* clip */
420 	OUT_BATCH(0);		/* sf */
421 	OUT_BATCH(wm_table);    /* ps */
422 }
423 
424 static void
gen4_emit_drawing_rectangle(struct intel_batchbuffer * batch,const struct igt_buf * dst)425 gen4_emit_drawing_rectangle(struct intel_batchbuffer *batch,
426 			    const struct igt_buf *dst)
427 {
428 	OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
429 	OUT_BATCH(0);
430 	OUT_BATCH((igt_buf_height(dst) - 1) << 16 |
431 		  (igt_buf_width(dst) - 1));
432 	OUT_BATCH(0);
433 }
434 
435 static void
gen4_emit_vertex_elements(struct intel_batchbuffer * batch)436 gen4_emit_vertex_elements(struct intel_batchbuffer *batch)
437 {
438 
439 	if (IS_GEN5(batch->devid)) {
440 		/* The VUE layout
441 		 *    dword 0-3: pad (0.0, 0.0, 0.0, 0.0),
442 		 *    dword 4-7: position (x, y, 1.0, 1.0),
443 		 *    dword 8-11: texture coordinate 0 (u0, v0, 0, 0)
444 		 *
445 		 * dword 4-11 are fetched from vertex buffer
446 		 */
447 		OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (3 * 2 + 1 - 2));
448 
449 		/* pad */
450 		OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
451 			  SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
452 			  0 << VE0_OFFSET_SHIFT);
453 		OUT_BATCH(GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
454 			  GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
455 			  GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
456 			  GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
457 
458 		/* x,y */
459 		OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
460 			  SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
461 			  0 << VE0_OFFSET_SHIFT);
462 		OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
463 			  GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
464 			  GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
465 			  GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
466 
467 		/* u0, v0 */
468 		OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
469 			  SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT |
470 			  4 << VE0_OFFSET_SHIFT);
471 		OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
472 			  GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
473 			  GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
474 			  GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
475 	} else {
476 		/* The VUE layout
477 		 *    dword 0-3: position (x, y, 1.0, 1.0),
478 		 *    dword 4-7: texture coordinate 0 (u0, v0, 0, 0)
479 		 *
480 		 * dword 0-7 are fetched from vertex buffer
481 		 */
482 		OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * 2 + 1 - 2));
483 
484 		/* x,y */
485 		OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
486 			  SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
487 			  0 << VE0_OFFSET_SHIFT);
488 		OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
489 			  GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
490 			  GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
491 			  GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
492 			  4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
493 
494 		/* u0, v0 */
495 		OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
496 			  SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT |
497 			  4 << VE0_OFFSET_SHIFT);
498 		OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
499 			  GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
500 			  GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
501 			  GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT |
502 			  8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
503 	}
504 }
505 
506 static uint32_t
gen4_create_cc_viewport(struct intel_batchbuffer * batch)507 gen4_create_cc_viewport(struct intel_batchbuffer *batch)
508 {
509 	struct gen4_cc_viewport *vp;
510 
511 	vp = intel_batchbuffer_subdata_alloc(batch, sizeof(*vp), 32);
512 
513 	vp->min_depth = -1.e35;
514 	vp->max_depth = 1.e35;
515 
516 	return intel_batchbuffer_subdata_offset(batch, vp);
517 }
518 
519 static uint32_t
gen4_create_cc_state(struct intel_batchbuffer * batch,uint32_t cc_vp)520 gen4_create_cc_state(struct intel_batchbuffer *batch,
521 		     uint32_t cc_vp)
522 {
523 	struct gen4_color_calc_state *cc;
524 
525 	cc = intel_batchbuffer_subdata_alloc(batch, sizeof(*cc), 64);
526 
527 	cc->cc4.cc_viewport_state_offset = cc_vp;
528 
529 	return intel_batchbuffer_subdata_offset(batch, cc);
530 }
531 
532 static uint32_t
gen4_create_sf_kernel(struct intel_batchbuffer * batch)533 gen4_create_sf_kernel(struct intel_batchbuffer *batch)
534 {
535 	if (IS_GEN5(batch->devid))
536 		return intel_batchbuffer_copy_data(batch, gen5_sf_kernel_nomask,
537 						   sizeof(gen5_sf_kernel_nomask),
538 						   64);
539 	else
540 		return intel_batchbuffer_copy_data(batch, gen4_sf_kernel_nomask,
541 						   sizeof(gen4_sf_kernel_nomask),
542 						   64);
543 }
544 
545 static uint32_t
gen4_create_ps_kernel(struct intel_batchbuffer * batch)546 gen4_create_ps_kernel(struct intel_batchbuffer *batch)
547 {
548 	if (IS_GEN5(batch->devid))
549 		return intel_batchbuffer_copy_data(batch, gen5_ps_kernel_nomask_affine,
550 						   sizeof(gen5_ps_kernel_nomask_affine),
551 						   64);
552 	else
553 		return intel_batchbuffer_copy_data(batch, gen4_ps_kernel_nomask_affine,
554 						   sizeof(gen4_ps_kernel_nomask_affine),
555 						   64);
556 }
557 
558 static uint32_t
gen4_create_sampler(struct intel_batchbuffer * batch,sampler_filter_t filter,sampler_extend_t extend)559 gen4_create_sampler(struct intel_batchbuffer *batch,
560 		    sampler_filter_t filter,
561 		    sampler_extend_t extend)
562 {
563 	struct gen4_sampler_state *ss;
564 
565 	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 32);
566 
567 	ss->ss0.lod_preclamp = GEN4_LOD_PRECLAMP_OGL;
568 
569 	/* We use the legacy mode to get the semantics specified by
570 	 * the Render extension.
571 	 */
572 	ss->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
573 
574 	switch (filter) {
575 	default:
576 	case SAMPLER_FILTER_NEAREST:
577 		ss->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
578 		ss->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
579 		break;
580 	case SAMPLER_FILTER_BILINEAR:
581 		ss->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
582 		ss->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
583 		break;
584 	}
585 
586 	switch (extend) {
587 	default:
588 	case SAMPLER_EXTEND_NONE:
589 		ss->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
590 		ss->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
591 		ss->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
592 		break;
593 	case SAMPLER_EXTEND_REPEAT:
594 		ss->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
595 		ss->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
596 		ss->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
597 		break;
598 	case SAMPLER_EXTEND_PAD:
599 		ss->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
600 		ss->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
601 		ss->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
602 		break;
603 	case SAMPLER_EXTEND_REFLECT:
604 		ss->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
605 		ss->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
606 		ss->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
607 		break;
608 	}
609 
610 	return intel_batchbuffer_subdata_offset(batch, ss);
611 }
612 
gen4_emit_vertex_buffer(struct intel_batchbuffer * batch)613 static void gen4_emit_vertex_buffer(struct intel_batchbuffer *batch)
614 {
615 	OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | (5 - 2));
616 	OUT_BATCH(GEN4_VB0_VERTEXDATA |
617 		  0 << GEN4_VB0_BUFFER_INDEX_SHIFT |
618 		  VERTEX_SIZE << VB0_BUFFER_PITCH_SHIFT);
619 	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
620 	if (IS_GEN5(batch->devid))
621 		OUT_RELOC(batch->bo, I915_GEM_DOMAIN_VERTEX, 0,
622 			  batch->bo->size - 1);
623 	else
624 		OUT_BATCH(batch->bo->size / VERTEX_SIZE - 1);
625 	OUT_BATCH(0);
626 }
627 
gen4_emit_primitive(struct intel_batchbuffer * batch)628 static uint32_t gen4_emit_primitive(struct intel_batchbuffer *batch)
629 {
630 	uint32_t offset;
631 
632 	OUT_BATCH(GEN4_3DPRIMITIVE |
633 		  GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
634 		  _3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT |
635 		  0 << 9 |
636 		  (6 - 2));
637 	OUT_BATCH(3);	/* vertex count */
638 	offset = batch_used(batch);
639 	OUT_BATCH(0);	/* vertex_index */
640 	OUT_BATCH(1);	/* single instance */
641 	OUT_BATCH(0);	/* start instance location */
642 	OUT_BATCH(0);	/* index buffer offset, ignored */
643 
644 	return offset;
645 }
646 
gen4_render_copyfunc(struct intel_batchbuffer * batch,drm_intel_context * context,const struct igt_buf * src,unsigned src_x,unsigned src_y,unsigned width,unsigned height,const struct igt_buf * dst,unsigned dst_x,unsigned dst_y)647 void gen4_render_copyfunc(struct intel_batchbuffer *batch,
648 			  drm_intel_context *context,
649 			  const struct igt_buf *src,
650 			  unsigned src_x, unsigned src_y,
651 			  unsigned width, unsigned height,
652 			  const struct igt_buf *dst,
653 			  unsigned dst_x, unsigned dst_y)
654 {
655 	uint32_t cc, cc_vp;
656 	uint32_t wm, wm_sampler, wm_kernel, wm_table;
657 	uint32_t sf, sf_kernel;
658 	uint32_t vs;
659 	uint32_t offset, batch_end;
660 
661 	igt_assert(src->bpp == dst->bpp);
662 	intel_batchbuffer_flush_with_context(batch, context);
663 
664 	batch->ptr = batch->buffer + 1024;
665 	intel_batchbuffer_subdata_alloc(batch, 64, 64);
666 
667 	vs = gen4_create_vs_state(batch);
668 
669 	sf_kernel = gen4_create_sf_kernel(batch);
670 	sf = gen4_create_sf_state(batch, sf_kernel);
671 
672 	wm_table = gen4_bind_surfaces(batch, src, dst);
673 	wm_kernel = gen4_create_ps_kernel(batch);
674 	wm_sampler = gen4_create_sampler(batch,
675 					 SAMPLER_FILTER_NEAREST,
676 					 SAMPLER_EXTEND_NONE);
677 	wm = gen4_create_wm_state(batch, wm_kernel, wm_sampler);
678 
679 	cc_vp = gen4_create_cc_viewport(batch);
680 	cc = gen4_create_cc_state(batch, cc_vp);
681 
682 	batch->ptr = batch->buffer;
683 
684 	gen4_emit_invariant(batch);
685 	gen4_emit_state_base_address(batch);
686 	gen4_emit_sip(batch);
687 	gen4_emit_null_depth_buffer(batch);
688 
689 	gen4_emit_drawing_rectangle(batch, dst);
690 	gen4_emit_binding_table(batch, wm_table);
691 	gen4_emit_vertex_elements(batch);
692 	gen4_emit_pipelined_pointers(batch, vs, sf, wm, cc);
693 	gen4_emit_urb(batch);
694 
695 	gen4_emit_vertex_buffer(batch);
696 	offset = gen4_emit_primitive(batch);
697 
698 	OUT_BATCH(MI_BATCH_BUFFER_END);
699 	batch_end = intel_batchbuffer_align(batch, 8);
700 
701 	*(uint32_t *)(batch->buffer + offset) =
702 		batch_round_upto(batch, VERTEX_SIZE)/VERTEX_SIZE;
703 
704 	emit_vertex_2s(batch, dst_x + width, dst_y + height);
705 	emit_vertex_normalized(batch, src_x + width, igt_buf_width(src));
706 	emit_vertex_normalized(batch, src_y + height, igt_buf_height(src));
707 
708 	emit_vertex_2s(batch, dst_x, dst_y + height);
709 	emit_vertex_normalized(batch, src_x, igt_buf_width(src));
710 	emit_vertex_normalized(batch, src_y + height, igt_buf_height(src));
711 
712 	emit_vertex_2s(batch, dst_x, dst_y);
713 	emit_vertex_normalized(batch, src_x, igt_buf_width(src));
714 	emit_vertex_normalized(batch, src_y, igt_buf_height(src));
715 
716 	gen4_render_flush(batch, context, batch_end);
717 	intel_batchbuffer_reset(batch);
718 }
719