1 #include "rendercopy.h"
2 #include "intel_chipset.h"
3 #include "gen4_render.h"
4 #include "surfaceformat.h"
5
6 #include <assert.h>
7
8 #define VERTEX_SIZE (3*4)
9
10 #define URB_VS_ENTRY_SIZE 1
11 #define URB_GS_ENTRY_SIZE 0
12 #define URB_CL_ENTRY_SIZE 0
13 #define URB_SF_ENTRY_SIZE 2
14 #define URB_CS_ENTRY_SIZE 1
15
16 #define GEN4_GRF_BLOCKS(nreg) (((nreg) + 15) / 16 - 1)
17 #define SF_KERNEL_NUM_GRF 16
18 #define PS_KERNEL_NUM_GRF 32
19
20 static const uint32_t gen4_sf_kernel_nomask[][4] = {
21 { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
22 { 0x00600001, 0x206003be, 0x00690060, 0x00000000 },
23 { 0x00600040, 0x20e077bd, 0x00690080, 0x006940a0 },
24 { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
25 { 0x00600040, 0x20e077bd, 0x006900a0, 0x00694060 },
26 { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
27 { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
28 };
29
30 static const uint32_t gen5_sf_kernel_nomask[][4] = {
31 { 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
32 { 0x00600001, 0x206003be, 0x00690060, 0x00000000 },
33 { 0x00600040, 0x20e077bd, 0x00690080, 0x006940a0 },
34 { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
35 { 0x00600040, 0x20e077bd, 0x006900a0, 0x00694060 },
36 { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
37 { 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },
38 };
39
40 static const uint32_t gen4_ps_kernel_nomask_affine[][4] = {
41 { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
42 { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
43 { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
44 { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
45 { 0x00802059, 0x200077bc, 0x00000060, 0x008d0100 },
46 { 0x00802048, 0x204077be, 0x00000064, 0x008d0140 },
47 { 0x00802059, 0x200077bc, 0x00000070, 0x008d0100 },
48 { 0x00802048, 0x208077be, 0x00000074, 0x008d0140 },
49 { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 },
50 { 0x00000201, 0x20280062, 0x00000000, 0x00000000 },
51 { 0x01800031, 0x21801d09, 0x008d0000, 0x02580001 },
52 { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
53 { 0x00601001, 0x20c003be, 0x008d01a0, 0x00000000 },
54 { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
55 { 0x00601001, 0x20e003be, 0x008d01e0, 0x00000000 },
56 { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
57 { 0x00601001, 0x210003be, 0x008d0220, 0x00000000 },
58 { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
59 { 0x00601001, 0x212003be, 0x008d0260, 0x00000000 },
60 { 0x00600201, 0x202003be, 0x008d0020, 0x00000000 },
61 { 0x00800031, 0x20001d28, 0x008d0000, 0x85a04800 },
62 };
63
64 static const uint32_t gen5_ps_kernel_nomask_affine[][4] = {
65 { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
66 { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
67 { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
68 { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
69 { 0x00802059, 0x200077bc, 0x00000060, 0x008d0100 },
70 { 0x00802048, 0x204077be, 0x00000064, 0x008d0140 },
71 { 0x00802059, 0x200077bc, 0x00000070, 0x008d0100 },
72 { 0x00802048, 0x208077be, 0x00000074, 0x008d0140 },
73 { 0x01800031, 0x21801fa9, 0x208d0000, 0x0a8a0001 },
74 { 0x00802001, 0x304003be, 0x008d0180, 0x00000000 },
75 { 0x00802001, 0x306003be, 0x008d01c0, 0x00000000 },
76 { 0x00802001, 0x308003be, 0x008d0200, 0x00000000 },
77 { 0x00802001, 0x30a003be, 0x008d0240, 0x00000000 },
78 { 0x00600201, 0x202003be, 0x008d0020, 0x00000000 },
79 { 0x00800031, 0x20001d28, 0x548d0000, 0x94084800 },
80 };
81
82 static uint32_t
batch_used(struct intel_batchbuffer * batch)83 batch_used(struct intel_batchbuffer *batch)
84 {
85 return batch->ptr - batch->buffer;
86 }
87
88 static uint32_t
batch_round_upto(struct intel_batchbuffer * batch,uint32_t divisor)89 batch_round_upto(struct intel_batchbuffer *batch, uint32_t divisor)
90 {
91 uint32_t offset = batch_used(batch);
92
93 offset = (offset + divisor - 1) / divisor * divisor;
94 batch->ptr = batch->buffer + offset;
95 return offset;
96 }
97
gen4_max_vs_nr_urb_entries(uint32_t devid)98 static int gen4_max_vs_nr_urb_entries(uint32_t devid)
99 {
100 return IS_GEN5(devid) ? 256 : 32;
101 }
102
gen4_max_sf_nr_urb_entries(uint32_t devid)103 static int gen4_max_sf_nr_urb_entries(uint32_t devid)
104 {
105 return IS_GEN5(devid) ? 128 : 64;
106 }
107
gen4_urb_size(uint32_t devid)108 static int gen4_urb_size(uint32_t devid)
109 {
110 return IS_GEN5(devid) ? 1024 : IS_G4X(devid) ? 384 : 256;
111 }
112
gen4_max_sf_threads(uint32_t devid)113 static int gen4_max_sf_threads(uint32_t devid)
114 {
115 return IS_GEN5(devid) ? 48 : 24;
116 }
117
gen4_max_wm_threads(uint32_t devid)118 static int gen4_max_wm_threads(uint32_t devid)
119 {
120 return IS_GEN5(devid) ? 72 : IS_G4X(devid) ? 50 : 32;
121 }
122
123 static void
gen4_render_flush(struct intel_batchbuffer * batch,drm_intel_context * context,uint32_t batch_end)124 gen4_render_flush(struct intel_batchbuffer *batch,
125 drm_intel_context *context, uint32_t batch_end)
126 {
127 int ret;
128
129 ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
130 if (ret == 0)
131 ret = drm_intel_gem_bo_context_exec(batch->bo, context,
132 batch_end, 0);
133 assert(ret == 0);
134 }
135
136 static uint32_t
gen4_bind_buf(struct intel_batchbuffer * batch,const struct igt_buf * buf,int is_dst)137 gen4_bind_buf(struct intel_batchbuffer *batch,
138 const struct igt_buf *buf,
139 int is_dst)
140 {
141 struct gen4_surface_state *ss;
142 uint32_t write_domain, read_domain;
143 int ret;
144
145 igt_assert_lte(buf->stride, 128*1024);
146 igt_assert_lte(igt_buf_width(buf), 8192);
147 igt_assert_lte(igt_buf_height(buf), 8192);
148
149 if (is_dst) {
150 write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
151 } else {
152 write_domain = 0;
153 read_domain = I915_GEM_DOMAIN_SAMPLER;
154 }
155
156 ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 32);
157
158 ss->ss0.surface_type = SURFACE_2D;
159 switch (buf->bpp) {
160 case 8: ss->ss0.surface_format = SURFACEFORMAT_R8_UNORM; break;
161 case 16: ss->ss0.surface_format = SURFACEFORMAT_R8G8_UNORM; break;
162 case 32: ss->ss0.surface_format = SURFACEFORMAT_B8G8R8A8_UNORM; break;
163 case 64: ss->ss0.surface_format = SURFACEFORMAT_R16G16B16A16_FLOAT; break;
164 default: igt_assert(0);
165 }
166
167 ss->ss0.data_return_format = SURFACERETURNFORMAT_FLOAT32;
168 ss->ss0.color_blend = 1;
169 ss->ss1.base_addr = buf->bo->offset;
170
171 ret = drm_intel_bo_emit_reloc(batch->bo,
172 intel_batchbuffer_subdata_offset(batch, ss) + 4,
173 buf->bo, 0,
174 read_domain, write_domain);
175 assert(ret == 0);
176
177 ss->ss2.height = igt_buf_height(buf) - 1;
178 ss->ss2.width = igt_buf_width(buf) - 1;
179 ss->ss3.pitch = buf->stride - 1;
180 ss->ss3.tiled_surface = buf->tiling != I915_TILING_NONE;
181 ss->ss3.tile_walk = buf->tiling == I915_TILING_Y;
182
183 return intel_batchbuffer_subdata_offset(batch, ss);
184 }
185
186 static uint32_t
gen4_bind_surfaces(struct intel_batchbuffer * batch,const struct igt_buf * src,const struct igt_buf * dst)187 gen4_bind_surfaces(struct intel_batchbuffer *batch,
188 const struct igt_buf *src,
189 const struct igt_buf *dst)
190 {
191 uint32_t *binding_table;
192
193 binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 32);
194
195 binding_table[0] = gen4_bind_buf(batch, dst, 1);
196 binding_table[1] = gen4_bind_buf(batch, src, 0);
197
198 return intel_batchbuffer_subdata_offset(batch, binding_table);
199 }
200
201 static void
gen4_emit_sip(struct intel_batchbuffer * batch)202 gen4_emit_sip(struct intel_batchbuffer *batch)
203 {
204 OUT_BATCH(GEN4_STATE_SIP | (2 - 2));
205 OUT_BATCH(0);
206 }
207
208 static void
gen4_emit_state_base_address(struct intel_batchbuffer * batch)209 gen4_emit_state_base_address(struct intel_batchbuffer *batch)
210 {
211 if (IS_GEN5(batch->devid)) {
212 OUT_BATCH(GEN4_STATE_BASE_ADDRESS | (8 - 2));
213 OUT_RELOC(batch->bo, /* general */
214 I915_GEM_DOMAIN_INSTRUCTION, 0,
215 BASE_ADDRESS_MODIFY);
216 OUT_RELOC(batch->bo, /* surface */
217 I915_GEM_DOMAIN_INSTRUCTION, 0,
218 BASE_ADDRESS_MODIFY);
219 OUT_BATCH(0); /* media */
220 OUT_RELOC(batch->bo, /* instruction */
221 I915_GEM_DOMAIN_INSTRUCTION, 0,
222 BASE_ADDRESS_MODIFY);
223
224 /* upper bounds, disable */
225 OUT_BATCH(BASE_ADDRESS_MODIFY); /* general */
226 OUT_BATCH(0); /* media */
227 OUT_BATCH(BASE_ADDRESS_MODIFY); /* instruction */
228 } else {
229 OUT_BATCH(GEN4_STATE_BASE_ADDRESS | (6 - 2));
230 OUT_RELOC(batch->bo, /* general */
231 I915_GEM_DOMAIN_INSTRUCTION, 0,
232 BASE_ADDRESS_MODIFY);
233 OUT_RELOC(batch->bo, /* surface */
234 I915_GEM_DOMAIN_INSTRUCTION, 0,
235 BASE_ADDRESS_MODIFY);
236 OUT_BATCH(0); /* media */
237
238 /* upper bounds, disable */
239 OUT_BATCH(BASE_ADDRESS_MODIFY); /* general */
240 OUT_BATCH(0); /* media */
241 }
242 }
243
244 static void
gen4_emit_pipelined_pointers(struct intel_batchbuffer * batch,uint32_t vs,uint32_t sf,uint32_t wm,uint32_t cc)245 gen4_emit_pipelined_pointers(struct intel_batchbuffer *batch,
246 uint32_t vs, uint32_t sf,
247 uint32_t wm, uint32_t cc)
248 {
249 OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | (7 - 2));
250 OUT_BATCH(vs);
251 OUT_BATCH(GEN4_GS_DISABLE);
252 OUT_BATCH(GEN4_CLIP_DISABLE);
253 OUT_BATCH(sf);
254 OUT_BATCH(wm);
255 OUT_BATCH(cc);
256 }
257
258 static void
gen4_emit_urb(struct intel_batchbuffer * batch)259 gen4_emit_urb(struct intel_batchbuffer *batch)
260 {
261 int vs_entries = gen4_max_vs_nr_urb_entries(batch->devid);
262 int gs_entries = 0;
263 int cl_entries = 0;
264 int sf_entries = gen4_max_sf_nr_urb_entries(batch->devid);
265 int cs_entries = 0;
266
267 int urb_vs_end = vs_entries * URB_VS_ENTRY_SIZE;
268 int urb_gs_end = urb_vs_end + gs_entries * URB_GS_ENTRY_SIZE;
269 int urb_cl_end = urb_gs_end + cl_entries * URB_CL_ENTRY_SIZE;
270 int urb_sf_end = urb_cl_end + sf_entries * URB_SF_ENTRY_SIZE;
271 int urb_cs_end = urb_sf_end + cs_entries * URB_CS_ENTRY_SIZE;
272
273 assert(urb_cs_end <= gen4_urb_size(batch->devid));
274
275 intel_batchbuffer_align(batch, 16);
276
277 OUT_BATCH(GEN4_URB_FENCE |
278 UF0_CS_REALLOC |
279 UF0_SF_REALLOC |
280 UF0_CLIP_REALLOC |
281 UF0_GS_REALLOC |
282 UF0_VS_REALLOC |
283 (3 - 2));
284 OUT_BATCH(urb_cl_end << UF1_CLIP_FENCE_SHIFT |
285 urb_gs_end << UF1_GS_FENCE_SHIFT |
286 urb_vs_end << UF1_VS_FENCE_SHIFT);
287 OUT_BATCH(urb_cs_end << UF2_CS_FENCE_SHIFT |
288 urb_sf_end << UF2_SF_FENCE_SHIFT);
289
290 OUT_BATCH(GEN4_CS_URB_STATE | (2 - 2));
291 OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | cs_entries << 0);
292 }
293
294 static void
gen4_emit_null_depth_buffer(struct intel_batchbuffer * batch)295 gen4_emit_null_depth_buffer(struct intel_batchbuffer *batch)
296 {
297 if (IS_G4X(batch->devid) || IS_GEN5(batch->devid)) {
298 OUT_BATCH(GEN4_3DSTATE_DEPTH_BUFFER | (6 - 2));
299 OUT_BATCH(SURFACE_NULL << GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
300 GEN4_DEPTHFORMAT_D32_FLOAT << GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
301 OUT_BATCH(0);
302 OUT_BATCH(0);
303 OUT_BATCH(0);
304 OUT_BATCH(0);
305 } else {
306 OUT_BATCH(GEN4_3DSTATE_DEPTH_BUFFER | (5 - 2));
307 OUT_BATCH(SURFACE_NULL << GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
308 GEN4_DEPTHFORMAT_D32_FLOAT << GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
309 OUT_BATCH(0);
310 OUT_BATCH(0);
311 OUT_BATCH(0);
312 }
313
314 if (IS_GEN5(batch->devid)) {
315 OUT_BATCH(GEN4_3DSTATE_CLEAR_PARAMS | (2 - 2));
316 OUT_BATCH(0);
317 }
318 }
319
320 static void
gen4_emit_invariant(struct intel_batchbuffer * batch)321 gen4_emit_invariant(struct intel_batchbuffer *batch)
322 {
323 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
324
325 if (IS_GEN5(batch->devid) || IS_G4X(batch->devid))
326 OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D);
327 else
328 OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
329 }
330
331 static uint32_t
gen4_create_vs_state(struct intel_batchbuffer * batch)332 gen4_create_vs_state(struct intel_batchbuffer *batch)
333 {
334 struct gen4_vs_state *vs;
335 int nr_urb_entries;
336
337 vs = intel_batchbuffer_subdata_alloc(batch, sizeof(*vs), 32);
338
339 /* Set up the vertex shader to be disabled (passthrough) */
340 nr_urb_entries = gen4_max_vs_nr_urb_entries(batch->devid);
341 if (IS_GEN5(batch->devid))
342 nr_urb_entries >>= 2;
343 vs->vs4.nr_urb_entries = nr_urb_entries;
344 vs->vs4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
345 vs->vs6.vs_enable = 0;
346 vs->vs6.vert_cache_disable = 1;
347
348 return intel_batchbuffer_subdata_offset(batch, vs);
349 }
350
351 static uint32_t
gen4_create_sf_state(struct intel_batchbuffer * batch,uint32_t kernel)352 gen4_create_sf_state(struct intel_batchbuffer *batch,
353 uint32_t kernel)
354 {
355 struct gen4_sf_state *sf;
356
357 sf = intel_batchbuffer_subdata_alloc(batch, sizeof(*sf), 32);
358
359 sf->sf0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
360 sf->sf0.kernel_start_pointer = kernel >> 6;
361
362 sf->sf3.urb_entry_read_length = 1; /* 1 URB per vertex */
363 /* don't smash vertex header, read start from dw8 */
364 sf->sf3.urb_entry_read_offset = 1;
365 sf->sf3.dispatch_grf_start_reg = 3;
366
367 sf->sf4.max_threads = gen4_max_sf_threads(batch->devid) - 1;
368 sf->sf4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
369 sf->sf4.nr_urb_entries = gen4_max_sf_nr_urb_entries(batch->devid);
370
371 sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
372 sf->sf6.dest_org_vbias = 0x8;
373 sf->sf6.dest_org_hbias = 0x8;
374
375 return intel_batchbuffer_subdata_offset(batch, sf);
376 }
377
378 static uint32_t
gen4_create_wm_state(struct intel_batchbuffer * batch,uint32_t kernel,uint32_t sampler)379 gen4_create_wm_state(struct intel_batchbuffer *batch,
380 uint32_t kernel,
381 uint32_t sampler)
382 {
383 struct gen4_wm_state *wm;
384
385 wm = intel_batchbuffer_subdata_alloc(batch, sizeof(*wm), 32);
386
387 assert((kernel & 63) == 0);
388 wm->wm0.kernel_start_pointer = kernel >> 6;
389 wm->wm0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
390
391 wm->wm3.urb_entry_read_offset = 0;
392 wm->wm3.dispatch_grf_start_reg = 3;
393
394 assert((sampler & 31) == 0);
395 wm->wm4.sampler_state_pointer = sampler >> 5;
396 wm->wm4.sampler_count = 1;
397
398 wm->wm5.max_threads = gen4_max_wm_threads(batch->devid);
399 wm->wm5.thread_dispatch_enable = 1;
400 wm->wm5.enable_16_pix = 1;
401 wm->wm5.early_depth_test = 1;
402
403 if (IS_GEN5(batch->devid))
404 wm->wm1.binding_table_entry_count = 0;
405 else
406 wm->wm1.binding_table_entry_count = 2;
407 wm->wm3.urb_entry_read_length = 2;
408
409 return intel_batchbuffer_subdata_offset(batch, wm);
410 }
411
412 static void
gen4_emit_binding_table(struct intel_batchbuffer * batch,uint32_t wm_table)413 gen4_emit_binding_table(struct intel_batchbuffer *batch,
414 uint32_t wm_table)
415 {
416 OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | (6 - 2));
417 OUT_BATCH(0); /* vs */
418 OUT_BATCH(0); /* gs */
419 OUT_BATCH(0); /* clip */
420 OUT_BATCH(0); /* sf */
421 OUT_BATCH(wm_table); /* ps */
422 }
423
424 static void
gen4_emit_drawing_rectangle(struct intel_batchbuffer * batch,const struct igt_buf * dst)425 gen4_emit_drawing_rectangle(struct intel_batchbuffer *batch,
426 const struct igt_buf *dst)
427 {
428 OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
429 OUT_BATCH(0);
430 OUT_BATCH((igt_buf_height(dst) - 1) << 16 |
431 (igt_buf_width(dst) - 1));
432 OUT_BATCH(0);
433 }
434
435 static void
gen4_emit_vertex_elements(struct intel_batchbuffer * batch)436 gen4_emit_vertex_elements(struct intel_batchbuffer *batch)
437 {
438
439 if (IS_GEN5(batch->devid)) {
440 /* The VUE layout
441 * dword 0-3: pad (0.0, 0.0, 0.0, 0.0),
442 * dword 4-7: position (x, y, 1.0, 1.0),
443 * dword 8-11: texture coordinate 0 (u0, v0, 0, 0)
444 *
445 * dword 4-11 are fetched from vertex buffer
446 */
447 OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (3 * 2 + 1 - 2));
448
449 /* pad */
450 OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
451 SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
452 0 << VE0_OFFSET_SHIFT);
453 OUT_BATCH(GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
454 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
455 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
456 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
457
458 /* x,y */
459 OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
460 SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
461 0 << VE0_OFFSET_SHIFT);
462 OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
463 GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
464 GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
465 GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
466
467 /* u0, v0 */
468 OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
469 SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT |
470 4 << VE0_OFFSET_SHIFT);
471 OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
472 GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
473 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
474 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
475 } else {
476 /* The VUE layout
477 * dword 0-3: position (x, y, 1.0, 1.0),
478 * dword 4-7: texture coordinate 0 (u0, v0, 0, 0)
479 *
480 * dword 0-7 are fetched from vertex buffer
481 */
482 OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * 2 + 1 - 2));
483
484 /* x,y */
485 OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
486 SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
487 0 << VE0_OFFSET_SHIFT);
488 OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
489 GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
490 GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
491 GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
492 4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
493
494 /* u0, v0 */
495 OUT_BATCH(0 << GEN4_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN4_VE0_VALID |
496 SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT |
497 4 << VE0_OFFSET_SHIFT);
498 OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
499 GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
500 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
501 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT |
502 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
503 }
504 }
505
506 static uint32_t
gen4_create_cc_viewport(struct intel_batchbuffer * batch)507 gen4_create_cc_viewport(struct intel_batchbuffer *batch)
508 {
509 struct gen4_cc_viewport *vp;
510
511 vp = intel_batchbuffer_subdata_alloc(batch, sizeof(*vp), 32);
512
513 vp->min_depth = -1.e35;
514 vp->max_depth = 1.e35;
515
516 return intel_batchbuffer_subdata_offset(batch, vp);
517 }
518
519 static uint32_t
gen4_create_cc_state(struct intel_batchbuffer * batch,uint32_t cc_vp)520 gen4_create_cc_state(struct intel_batchbuffer *batch,
521 uint32_t cc_vp)
522 {
523 struct gen4_color_calc_state *cc;
524
525 cc = intel_batchbuffer_subdata_alloc(batch, sizeof(*cc), 64);
526
527 cc->cc4.cc_viewport_state_offset = cc_vp;
528
529 return intel_batchbuffer_subdata_offset(batch, cc);
530 }
531
532 static uint32_t
gen4_create_sf_kernel(struct intel_batchbuffer * batch)533 gen4_create_sf_kernel(struct intel_batchbuffer *batch)
534 {
535 if (IS_GEN5(batch->devid))
536 return intel_batchbuffer_copy_data(batch, gen5_sf_kernel_nomask,
537 sizeof(gen5_sf_kernel_nomask),
538 64);
539 else
540 return intel_batchbuffer_copy_data(batch, gen4_sf_kernel_nomask,
541 sizeof(gen4_sf_kernel_nomask),
542 64);
543 }
544
545 static uint32_t
gen4_create_ps_kernel(struct intel_batchbuffer * batch)546 gen4_create_ps_kernel(struct intel_batchbuffer *batch)
547 {
548 if (IS_GEN5(batch->devid))
549 return intel_batchbuffer_copy_data(batch, gen5_ps_kernel_nomask_affine,
550 sizeof(gen5_ps_kernel_nomask_affine),
551 64);
552 else
553 return intel_batchbuffer_copy_data(batch, gen4_ps_kernel_nomask_affine,
554 sizeof(gen4_ps_kernel_nomask_affine),
555 64);
556 }
557
558 static uint32_t
gen4_create_sampler(struct intel_batchbuffer * batch,sampler_filter_t filter,sampler_extend_t extend)559 gen4_create_sampler(struct intel_batchbuffer *batch,
560 sampler_filter_t filter,
561 sampler_extend_t extend)
562 {
563 struct gen4_sampler_state *ss;
564
565 ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 32);
566
567 ss->ss0.lod_preclamp = GEN4_LOD_PRECLAMP_OGL;
568
569 /* We use the legacy mode to get the semantics specified by
570 * the Render extension.
571 */
572 ss->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
573
574 switch (filter) {
575 default:
576 case SAMPLER_FILTER_NEAREST:
577 ss->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
578 ss->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
579 break;
580 case SAMPLER_FILTER_BILINEAR:
581 ss->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
582 ss->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
583 break;
584 }
585
586 switch (extend) {
587 default:
588 case SAMPLER_EXTEND_NONE:
589 ss->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
590 ss->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
591 ss->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
592 break;
593 case SAMPLER_EXTEND_REPEAT:
594 ss->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
595 ss->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
596 ss->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
597 break;
598 case SAMPLER_EXTEND_PAD:
599 ss->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
600 ss->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
601 ss->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
602 break;
603 case SAMPLER_EXTEND_REFLECT:
604 ss->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
605 ss->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
606 ss->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
607 break;
608 }
609
610 return intel_batchbuffer_subdata_offset(batch, ss);
611 }
612
gen4_emit_vertex_buffer(struct intel_batchbuffer * batch)613 static void gen4_emit_vertex_buffer(struct intel_batchbuffer *batch)
614 {
615 OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | (5 - 2));
616 OUT_BATCH(GEN4_VB0_VERTEXDATA |
617 0 << GEN4_VB0_BUFFER_INDEX_SHIFT |
618 VERTEX_SIZE << VB0_BUFFER_PITCH_SHIFT);
619 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
620 if (IS_GEN5(batch->devid))
621 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_VERTEX, 0,
622 batch->bo->size - 1);
623 else
624 OUT_BATCH(batch->bo->size / VERTEX_SIZE - 1);
625 OUT_BATCH(0);
626 }
627
gen4_emit_primitive(struct intel_batchbuffer * batch)628 static uint32_t gen4_emit_primitive(struct intel_batchbuffer *batch)
629 {
630 uint32_t offset;
631
632 OUT_BATCH(GEN4_3DPRIMITIVE |
633 GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
634 _3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT |
635 0 << 9 |
636 (6 - 2));
637 OUT_BATCH(3); /* vertex count */
638 offset = batch_used(batch);
639 OUT_BATCH(0); /* vertex_index */
640 OUT_BATCH(1); /* single instance */
641 OUT_BATCH(0); /* start instance location */
642 OUT_BATCH(0); /* index buffer offset, ignored */
643
644 return offset;
645 }
646
gen4_render_copyfunc(struct intel_batchbuffer * batch,drm_intel_context * context,const struct igt_buf * src,unsigned src_x,unsigned src_y,unsigned width,unsigned height,const struct igt_buf * dst,unsigned dst_x,unsigned dst_y)647 void gen4_render_copyfunc(struct intel_batchbuffer *batch,
648 drm_intel_context *context,
649 const struct igt_buf *src,
650 unsigned src_x, unsigned src_y,
651 unsigned width, unsigned height,
652 const struct igt_buf *dst,
653 unsigned dst_x, unsigned dst_y)
654 {
655 uint32_t cc, cc_vp;
656 uint32_t wm, wm_sampler, wm_kernel, wm_table;
657 uint32_t sf, sf_kernel;
658 uint32_t vs;
659 uint32_t offset, batch_end;
660
661 igt_assert(src->bpp == dst->bpp);
662 intel_batchbuffer_flush_with_context(batch, context);
663
664 batch->ptr = batch->buffer + 1024;
665 intel_batchbuffer_subdata_alloc(batch, 64, 64);
666
667 vs = gen4_create_vs_state(batch);
668
669 sf_kernel = gen4_create_sf_kernel(batch);
670 sf = gen4_create_sf_state(batch, sf_kernel);
671
672 wm_table = gen4_bind_surfaces(batch, src, dst);
673 wm_kernel = gen4_create_ps_kernel(batch);
674 wm_sampler = gen4_create_sampler(batch,
675 SAMPLER_FILTER_NEAREST,
676 SAMPLER_EXTEND_NONE);
677 wm = gen4_create_wm_state(batch, wm_kernel, wm_sampler);
678
679 cc_vp = gen4_create_cc_viewport(batch);
680 cc = gen4_create_cc_state(batch, cc_vp);
681
682 batch->ptr = batch->buffer;
683
684 gen4_emit_invariant(batch);
685 gen4_emit_state_base_address(batch);
686 gen4_emit_sip(batch);
687 gen4_emit_null_depth_buffer(batch);
688
689 gen4_emit_drawing_rectangle(batch, dst);
690 gen4_emit_binding_table(batch, wm_table);
691 gen4_emit_vertex_elements(batch);
692 gen4_emit_pipelined_pointers(batch, vs, sf, wm, cc);
693 gen4_emit_urb(batch);
694
695 gen4_emit_vertex_buffer(batch);
696 offset = gen4_emit_primitive(batch);
697
698 OUT_BATCH(MI_BATCH_BUFFER_END);
699 batch_end = intel_batchbuffer_align(batch, 8);
700
701 *(uint32_t *)(batch->buffer + offset) =
702 batch_round_upto(batch, VERTEX_SIZE)/VERTEX_SIZE;
703
704 emit_vertex_2s(batch, dst_x + width, dst_y + height);
705 emit_vertex_normalized(batch, src_x + width, igt_buf_width(src));
706 emit_vertex_normalized(batch, src_y + height, igt_buf_height(src));
707
708 emit_vertex_2s(batch, dst_x, dst_y + height);
709 emit_vertex_normalized(batch, src_x, igt_buf_width(src));
710 emit_vertex_normalized(batch, src_y + height, igt_buf_height(src));
711
712 emit_vertex_2s(batch, dst_x, dst_y);
713 emit_vertex_normalized(batch, src_x, igt_buf_width(src));
714 emit_vertex_normalized(batch, src_y, igt_buf_height(src));
715
716 gen4_render_flush(batch, context, batch_end);
717 intel_batchbuffer_reset(batch);
718 }
719