1 #include <assert.h>
2 #include <stdlib.h>
3 #include <sys/ioctl.h>
4 #include <stdio.h>
5 #include <string.h>
6 #include <assert.h>
7 #include <fcntl.h>
8 #include <inttypes.h>
9 #include <errno.h>
10 #include <sys/stat.h>
11 #include <sys/time.h>
12 #include <getopt.h>
13
14 #include <drm.h>
15 #include <i915_drm.h>
16
17 #include "drmtest.h"
18 #include "intel_bufmgr.h"
19 #include "intel_batchbuffer.h"
20 #include "intel_io.h"
21 #include "rendercopy.h"
22 #include "gen9_render.h"
23 #include "intel_reg.h"
24 #include "igt_aux.h"
25
26 #include "intel_aub.h"
27
28 #define VERTEX_SIZE (3*4)
29
30 #if DEBUG_RENDERCPY
dump_batch(struct intel_batchbuffer * batch)31 static void dump_batch(struct intel_batchbuffer *batch) {
32 int fd = open("/tmp/i965-batchbuffers.dump", O_WRONLY | O_CREAT, 0666);
33 if (fd != -1) {
34 igt_assert_eq(write(fd, batch->buffer, 4096), 4096);
35 fd = close(fd);
36 }
37 }
38 #else
39 #define dump_batch(x) do { } while(0)
40 #endif
41
42 struct {
43 uint32_t cc_state;
44 uint32_t blend_state;
45 } cc;
46
47 struct {
48 uint32_t cc_state;
49 uint32_t sf_clip_state;
50 } viewport;
51
52 /* see lib/i915/shaders/ps/blit.g7a */
53 static const uint32_t ps_kernel_gen9[][4] = {
54 #if 1
55 { 0x0080005a, 0x2f403ae8, 0x3a0000c0, 0x008d0040 },
56 { 0x0080005a, 0x2f803ae8, 0x3a0000d0, 0x008d0040 },
57 { 0x02800031, 0x2e203a48, 0x0e8d0f40, 0x08840001 },
58 { 0x05800031, 0x20003a40, 0x0e8d0e20, 0x90031000 },
59 #else
60 /* Write all -1 */
61 { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 },
62 { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 },
63 { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 },
64 { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 },
65 { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 },
66 { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 },
67 { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 },
68 { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 },
69 { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
70 #endif
71 };
72
73 /* see lib/i915/shaders/ps/blit.g11a */
74 static const uint32_t ps_kernel_gen11[][4] = {
75 #if 1
76 { 0x0060005b, 0x2000c01c, 0x07206601, 0x01800404 },
77 { 0x0060005b, 0x7100480c, 0x0722003b, 0x01880406 },
78 { 0x0060005b, 0x2000c01c, 0x07206601, 0x01800408 },
79 { 0x0060005b, 0x7200480c, 0x0722003b, 0x0188040a },
80 { 0x0060005b, 0x2000c01c, 0x07206e01, 0x01a00404 },
81 { 0x0060005b, 0x7300480c, 0x0722003b, 0x01a80406 },
82 { 0x0060005b, 0x2000c01c, 0x07206e01, 0x01a00408 },
83 { 0x0060005b, 0x7400480c, 0x0722003b, 0x01a8040a },
84 { 0x02800031, 0x21804a4c, 0x06000e20, 0x08840001 },
85 { 0x00800001, 0x2e204b28, 0x008d0180, 0x00000000 },
86 { 0x00800001, 0x2e604b28, 0x008d01c0, 0x00000000 },
87 { 0x00800001, 0x2ea04b28, 0x008d0200, 0x00000000 },
88 { 0x00800001, 0x2ee04b28, 0x008d0240, 0x00000000 },
89 { 0x05800031, 0x20004a44, 0x06000e20, 0x90031000 },
90 #else
91 /* Write all -1 */
92 { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 },
93 { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 },
94 { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 },
95 { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 },
96 { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 },
97 { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 },
98 { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 },
99 { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 },
100 { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
101 #endif
102 };
103
104 /* AUB annotation support */
105 #define MAX_ANNOTATIONS 33
106 struct annotations_context {
107 drm_intel_aub_annotation annotations[MAX_ANNOTATIONS];
108 int index;
109 uint32_t offset;
110 } aub_annotations;
111
annotation_init(struct annotations_context * ctx)112 static void annotation_init(struct annotations_context *ctx)
113 {
114 /* ctx->annotations is an array keeping a list of annotations of the
115 * batch buffer ordered by offset. ctx->annotations[0] is thus left
116 * for the command stream and will be filled just before executing
117 * the batch buffer with annotations_add_batch() */
118 ctx->index = 1;
119 }
120
add_annotation(drm_intel_aub_annotation * a,uint32_t type,uint32_t subtype,uint32_t ending_offset)121 static void add_annotation(drm_intel_aub_annotation *a,
122 uint32_t type, uint32_t subtype,
123 uint32_t ending_offset)
124 {
125 a->type = type;
126 a->subtype = subtype;
127 a->ending_offset = ending_offset;
128 }
129
annotation_add_batch(struct annotations_context * ctx,size_t size)130 static void annotation_add_batch(struct annotations_context *ctx, size_t size)
131 {
132 add_annotation(&ctx->annotations[0], AUB_TRACE_TYPE_BATCH, 0, size);
133 }
134
annotation_add_state(struct annotations_context * ctx,uint32_t state_type,uint32_t start_offset,size_t size)135 static void annotation_add_state(struct annotations_context *ctx,
136 uint32_t state_type,
137 uint32_t start_offset,
138 size_t size)
139 {
140 assert(ctx->index < MAX_ANNOTATIONS);
141
142 add_annotation(&ctx->annotations[ctx->index++],
143 AUB_TRACE_TYPE_NOTYPE, 0,
144 start_offset);
145 add_annotation(&ctx->annotations[ctx->index++],
146 AUB_TRACE_TYPE(state_type),
147 AUB_TRACE_SUBTYPE(state_type),
148 start_offset + size);
149 }
150
annotation_flush(struct annotations_context * ctx,struct intel_batchbuffer * batch)151 static void annotation_flush(struct annotations_context *ctx,
152 struct intel_batchbuffer *batch)
153 {
154 if (!igt_aub_dump_enabled())
155 return;
156
157 drm_intel_bufmgr_gem_set_aub_annotations(batch->bo,
158 ctx->annotations,
159 ctx->index);
160 }
161
162 static void
gen6_render_flush(struct intel_batchbuffer * batch,drm_intel_context * context,uint32_t batch_end)163 gen6_render_flush(struct intel_batchbuffer *batch,
164 drm_intel_context *context, uint32_t batch_end)
165 {
166 int ret;
167
168 ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
169 if (ret == 0)
170 ret = drm_intel_gem_bo_context_exec(batch->bo, context,
171 batch_end, 0);
172 assert(ret == 0);
173 }
174
175 /* Mostly copy+paste from gen6, except height, width, pitch moved */
176 static uint32_t
gen8_bind_buf(struct intel_batchbuffer * batch,const struct igt_buf * buf,int is_dst)177 gen8_bind_buf(struct intel_batchbuffer *batch, const struct igt_buf *buf,
178 int is_dst) {
179 struct gen9_surface_state *ss;
180 uint32_t write_domain, read_domain, offset;
181 int ret;
182
183 igt_assert_lte(buf->stride, 256*1024);
184 igt_assert_lte(igt_buf_width(buf), 16384);
185 igt_assert_lte(igt_buf_height(buf), 16384);
186
187 if (is_dst) {
188 write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
189 } else {
190 write_domain = 0;
191 read_domain = I915_GEM_DOMAIN_SAMPLER;
192 }
193
194 ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
195 offset = intel_batchbuffer_subdata_offset(batch, ss);
196 annotation_add_state(&aub_annotations, AUB_TRACE_SURFACE_STATE,
197 offset, sizeof(*ss));
198
199 ss->ss0.surface_type = SURFACE_2D;
200 switch (buf->bpp) {
201 case 8: ss->ss0.surface_format = SURFACEFORMAT_R8_UNORM; break;
202 case 16: ss->ss0.surface_format = SURFACEFORMAT_R8G8_UNORM; break;
203 case 32: ss->ss0.surface_format = SURFACEFORMAT_B8G8R8A8_UNORM; break;
204 case 64: ss->ss0.surface_format = SURFACEFORMAT_R16G16B16A16_FLOAT; break;
205 default: igt_assert(0);
206 }
207 ss->ss0.render_cache_read_write = 1;
208 ss->ss0.vertical_alignment = 1; /* align 4 */
209 ss->ss0.horizontal_alignment = 1; /* align 4 */
210 if (buf->tiling == I915_TILING_X)
211 ss->ss0.tiled_mode = 2;
212 else if (buf->tiling != I915_TILING_NONE)
213 ss->ss0.tiled_mode = 3;
214
215 ss->ss1.memory_object_control = I915_MOCS_PTE << 1;
216
217 if (buf->tiling == I915_TILING_Yf)
218 ss->ss5.trmode = 1;
219 else if (buf->tiling == I915_TILING_Ys)
220 ss->ss5.trmode = 2;
221 ss->ss5.mip_tail_start_lod = 1; /* needed with trmode */
222
223 ss->ss8.base_addr = buf->bo->offset64;
224 ss->ss9.base_addr_hi = buf->bo->offset64 >> 32;
225
226 ret = drm_intel_bo_emit_reloc(batch->bo,
227 intel_batchbuffer_subdata_offset(batch, &ss->ss8),
228 buf->bo, 0,
229 read_domain, write_domain);
230 assert(ret == 0);
231
232 ss->ss2.height = igt_buf_height(buf) - 1;
233 ss->ss2.width = igt_buf_width(buf) - 1;
234 ss->ss3.pitch = buf->stride - 1;
235
236 ss->ss7.shader_chanel_select_r = 4;
237 ss->ss7.shader_chanel_select_g = 5;
238 ss->ss7.shader_chanel_select_b = 6;
239 ss->ss7.shader_chanel_select_a = 7;
240
241 if (buf->aux.stride) {
242 ss->ss6.aux_mode = 0x5; /* AUX_CCS_E */
243 ss->ss6.aux_pitch = (buf->aux.stride / 128) - 1;
244
245 ss->ss10.aux_base_addr = buf->bo->offset64 + buf->aux.offset;
246 ss->ss11.aux_base_addr_hi = (buf->bo->offset64 + buf->aux.offset) >> 32;
247
248 ret = drm_intel_bo_emit_reloc(batch->bo,
249 intel_batchbuffer_subdata_offset(batch, &ss->ss10),
250 buf->bo, buf->aux.offset,
251 read_domain, write_domain);
252 assert(ret == 0);
253 }
254
255 return offset;
256 }
257
258 static uint32_t
gen8_bind_surfaces(struct intel_batchbuffer * batch,const struct igt_buf * src,const struct igt_buf * dst)259 gen8_bind_surfaces(struct intel_batchbuffer *batch,
260 const struct igt_buf *src,
261 const struct igt_buf *dst)
262 {
263 uint32_t *binding_table, offset;
264
265 binding_table = intel_batchbuffer_subdata_alloc(batch, 8, 32);
266 offset = intel_batchbuffer_subdata_offset(batch, binding_table);
267 annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE,
268 offset, 8);
269
270 binding_table[0] = gen8_bind_buf(batch, dst, 1);
271 binding_table[1] = gen8_bind_buf(batch, src, 0);
272
273 return offset;
274 }
275
276 /* Mostly copy+paste from gen6, except wrap modes moved */
277 static uint32_t
gen8_create_sampler(struct intel_batchbuffer * batch)278 gen8_create_sampler(struct intel_batchbuffer *batch) {
279 struct gen8_sampler_state *ss;
280 uint32_t offset;
281
282 ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
283 offset = intel_batchbuffer_subdata_offset(batch, ss);
284 annotation_add_state(&aub_annotations, AUB_TRACE_SAMPLER_STATE,
285 offset, sizeof(*ss));
286
287 ss->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
288 ss->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
289 ss->ss3.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
290 ss->ss3.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
291 ss->ss3.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
292
293 /* I've experimented with non-normalized coordinates and using the LD
294 * sampler fetch, but couldn't make it work. */
295 ss->ss3.non_normalized_coord = 0;
296
297 return offset;
298 }
299
300 static uint32_t
gen8_fill_ps(struct intel_batchbuffer * batch,const uint32_t kernel[][4],size_t size)301 gen8_fill_ps(struct intel_batchbuffer *batch,
302 const uint32_t kernel[][4],
303 size_t size)
304 {
305 uint32_t offset;
306
307 offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
308 annotation_add_state(&aub_annotations, AUB_TRACE_KERNEL_INSTRUCTIONS,
309 offset, size);
310
311 return offset;
312 }
313
314 /*
315 * gen7_fill_vertex_buffer_data populate vertex buffer with data.
316 *
317 * The vertex buffer consists of 3 vertices to construct a RECTLIST. The 4th
318 * vertex is implied (automatically derived by the HW). Each element has the
319 * destination offset, and the normalized texture offset (src). The rectangle
320 * itself will span the entire subsurface to be copied.
321 *
322 * see gen6_emit_vertex_elements
323 */
324 static uint32_t
gen7_fill_vertex_buffer_data(struct intel_batchbuffer * batch,const struct igt_buf * src,uint32_t src_x,uint32_t src_y,uint32_t dst_x,uint32_t dst_y,uint32_t width,uint32_t height)325 gen7_fill_vertex_buffer_data(struct intel_batchbuffer *batch,
326 const struct igt_buf *src,
327 uint32_t src_x, uint32_t src_y,
328 uint32_t dst_x, uint32_t dst_y,
329 uint32_t width, uint32_t height)
330 {
331 void *start;
332 uint32_t offset;
333
334 intel_batchbuffer_align(batch, 8);
335 start = batch->ptr;
336
337 emit_vertex_2s(batch, dst_x + width, dst_y + height);
338 emit_vertex_normalized(batch, src_x + width, igt_buf_width(src));
339 emit_vertex_normalized(batch, src_y + height, igt_buf_height(src));
340
341 emit_vertex_2s(batch, dst_x, dst_y + height);
342 emit_vertex_normalized(batch, src_x, igt_buf_width(src));
343 emit_vertex_normalized(batch, src_y + height, igt_buf_height(src));
344
345 emit_vertex_2s(batch, dst_x, dst_y);
346 emit_vertex_normalized(batch, src_x, igt_buf_width(src));
347 emit_vertex_normalized(batch, src_y, igt_buf_height(src));
348
349 offset = intel_batchbuffer_subdata_offset(batch, start);
350 annotation_add_state(&aub_annotations, AUB_TRACE_VERTEX_BUFFER,
351 offset, 3 * VERTEX_SIZE);
352 return offset;
353 }
354
355 /*
356 * gen6_emit_vertex_elements - The vertex elements describe the contents of the
357 * vertex buffer. We pack the vertex buffer in a semi weird way, conforming to
358 * what gen6_rendercopy did. The most straightforward would be to store
359 * everything as floats.
360 *
361 * see gen7_fill_vertex_buffer_data() for where the corresponding elements are
362 * packed.
363 */
364 static void
gen6_emit_vertex_elements(struct intel_batchbuffer * batch)365 gen6_emit_vertex_elements(struct intel_batchbuffer *batch) {
366 /*
367 * The VUE layout
368 * dword 0-3: pad (0, 0, 0. 0)
369 * dword 4-7: position (x, y, 0, 1.0),
370 * dword 8-11: texture coordinate 0 (u0, v0, 0, 1.0)
371 */
372 OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (3 * 2 + 1 - 2));
373
374 /* Element state 0. These are 4 dwords of 0 required for the VUE format.
375 * We don't really know or care what they do.
376 */
377 OUT_BATCH(0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN6_VE0_VALID |
378 SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
379 0 << VE0_OFFSET_SHIFT); /* we specify 0, but it's really does not exist */
380 OUT_BATCH(GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
381 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
382 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
383 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
384
385 /* Element state 1 - Our "destination" vertices. These are passed down
386 * through the pipeline, and eventually make it to the pixel shader as
387 * the offsets in the destination surface. It's packed as the 16
388 * signed/scaled because of gen6 rendercopy. I see no particular reason
389 * for doing this though.
390 */
391 OUT_BATCH(0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN6_VE0_VALID |
392 SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
393 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */
394 OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
395 GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
396 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
397 GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
398
399 /* Element state 2. Last but not least we store the U,V components as
400 * normalized floats. These will be used in the pixel shader to sample
401 * from the source buffer.
402 */
403 OUT_BATCH(0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN6_VE0_VALID |
404 SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT |
405 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */
406 OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
407 GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
408 GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
409 GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
410 }
411
412 /*
413 * gen7_emit_vertex_buffer emit the vertex buffers command
414 *
415 * @batch
416 * @offset - bytw offset within the @batch where the vertex buffer starts.
417 */
gen7_emit_vertex_buffer(struct intel_batchbuffer * batch,uint32_t offset)418 static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch,
419 uint32_t offset) {
420 OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | (1 + (4 * 1) - 2));
421 OUT_BATCH(0 << GEN6_VB0_BUFFER_INDEX_SHIFT | /* VB 0th index */
422 GEN8_VB0_BUFFER_ADDR_MOD_EN | /* Address Modify Enable */
423 VERTEX_SIZE << VB0_BUFFER_PITCH_SHIFT);
424 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
425 OUT_BATCH(3 * VERTEX_SIZE);
426 }
427
428 static uint32_t
gen6_create_cc_state(struct intel_batchbuffer * batch)429 gen6_create_cc_state(struct intel_batchbuffer *batch)
430 {
431 struct gen6_color_calc_state *cc_state;
432 uint32_t offset;
433
434 cc_state = intel_batchbuffer_subdata_alloc(batch,
435 sizeof(*cc_state), 64);
436 offset = intel_batchbuffer_subdata_offset(batch, cc_state);
437 annotation_add_state(&aub_annotations, AUB_TRACE_CC_STATE,
438 offset, sizeof(*cc_state));
439
440 return offset;
441 }
442
443 static uint32_t
gen8_create_blend_state(struct intel_batchbuffer * batch)444 gen8_create_blend_state(struct intel_batchbuffer *batch)
445 {
446 struct gen8_blend_state *blend;
447 int i;
448 uint32_t offset;
449
450 blend = intel_batchbuffer_subdata_alloc(batch, sizeof(*blend), 64);
451 offset = intel_batchbuffer_subdata_offset(batch, blend);
452 annotation_add_state(&aub_annotations, AUB_TRACE_BLEND_STATE,
453 offset, sizeof(*blend));
454
455 for (i = 0; i < 16; i++) {
456 blend->bs[i].dest_blend_factor = GEN6_BLENDFACTOR_ZERO;
457 blend->bs[i].source_blend_factor = GEN6_BLENDFACTOR_ONE;
458 blend->bs[i].color_blend_func = GEN6_BLENDFUNCTION_ADD;
459 blend->bs[i].pre_blend_color_clamp = 1;
460 blend->bs[i].color_buffer_blend = 0;
461 }
462
463 return offset;
464 }
465
466 static uint32_t
gen6_create_cc_viewport(struct intel_batchbuffer * batch)467 gen6_create_cc_viewport(struct intel_batchbuffer *batch)
468 {
469 struct gen4_cc_viewport *vp;
470 uint32_t offset;
471
472 vp = intel_batchbuffer_subdata_alloc(batch, sizeof(*vp), 32);
473 offset = intel_batchbuffer_subdata_offset(batch, vp);
474 annotation_add_state(&aub_annotations, AUB_TRACE_CC_VP_STATE,
475 offset, sizeof(*vp));
476
477 /* XXX I don't understand this */
478 vp->min_depth = -1.e35;
479 vp->max_depth = 1.e35;
480
481 return offset;
482 }
483
484 static uint32_t
gen7_create_sf_clip_viewport(struct intel_batchbuffer * batch)485 gen7_create_sf_clip_viewport(struct intel_batchbuffer *batch) {
486 /* XXX these are likely not needed */
487 struct gen7_sf_clip_viewport *scv_state;
488 uint32_t offset;
489
490 scv_state = intel_batchbuffer_subdata_alloc(batch,
491 sizeof(*scv_state), 64);
492 offset = intel_batchbuffer_subdata_offset(batch, scv_state);
493 annotation_add_state(&aub_annotations, AUB_TRACE_CLIP_VP_STATE,
494 offset, sizeof(*scv_state));
495
496 scv_state->guardband.xmin = 0;
497 scv_state->guardband.xmax = 1.0f;
498 scv_state->guardband.ymin = 0;
499 scv_state->guardband.ymax = 1.0f;
500
501 return offset;
502 }
503
504 static uint32_t
gen6_create_scissor_rect(struct intel_batchbuffer * batch)505 gen6_create_scissor_rect(struct intel_batchbuffer *batch)
506 {
507 struct gen6_scissor_rect *scissor;
508 uint32_t offset;
509
510 scissor = intel_batchbuffer_subdata_alloc(batch, sizeof(*scissor), 64);
511 offset = intel_batchbuffer_subdata_offset(batch, scissor);
512 annotation_add_state(&aub_annotations, AUB_TRACE_SCISSOR_STATE,
513 offset, sizeof(*scissor));
514
515 return offset;
516 }
517
518 static void
gen8_emit_sip(struct intel_batchbuffer * batch)519 gen8_emit_sip(struct intel_batchbuffer *batch) {
520 OUT_BATCH(GEN4_STATE_SIP | (3 - 2));
521 OUT_BATCH(0);
522 OUT_BATCH(0);
523 }
524
525 static void
gen7_emit_push_constants(struct intel_batchbuffer * batch)526 gen7_emit_push_constants(struct intel_batchbuffer *batch) {
527 OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS);
528 OUT_BATCH(0);
529 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS);
530 OUT_BATCH(0);
531 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS);
532 OUT_BATCH(0);
533 OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS);
534 OUT_BATCH(0);
535 OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
536 OUT_BATCH(0);
537 }
538
539 static void
gen9_emit_state_base_address(struct intel_batchbuffer * batch)540 gen9_emit_state_base_address(struct intel_batchbuffer *batch) {
541
542 /* WaBindlessSurfaceStateModifyEnable:skl,bxt */
543 /* The length has to be one less if we dont modify
544 bindless state */
545 OUT_BATCH(GEN4_STATE_BASE_ADDRESS | (19 - 1 - 2));
546
547 /* general */
548 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
549 OUT_BATCH(0);
550
551 /* stateless data port */
552 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
553
554 /* surface */
555 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
556
557 /* dynamic */
558 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
559 0, BASE_ADDRESS_MODIFY);
560
561 /* indirect */
562 OUT_BATCH(0);
563 OUT_BATCH(0);
564
565 /* instruction */
566 OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
567
568 /* general state buffer size */
569 OUT_BATCH(0xfffff000 | 1);
570 /* dynamic state buffer size */
571 OUT_BATCH(1 << 12 | 1);
572 /* indirect object buffer size */
573 OUT_BATCH(0xfffff000 | 1);
574 /* intruction buffer size */
575 OUT_BATCH(1 << 12 | 1);
576
577 /* Bindless surface state base address */
578 OUT_BATCH(0);
579 OUT_BATCH(0);
580 OUT_BATCH(0);
581 }
582
583 static void
gen7_emit_urb(struct intel_batchbuffer * batch)584 gen7_emit_urb(struct intel_batchbuffer *batch) {
585 /* XXX: Min valid values from mesa */
586 const int vs_entries = 64;
587 const int vs_size = 2;
588 const int vs_start = 4;
589
590 OUT_BATCH(GEN7_3DSTATE_URB_VS);
591 OUT_BATCH(vs_entries | ((vs_size - 1) << 16) | (vs_start << 25));
592 OUT_BATCH(GEN7_3DSTATE_URB_GS);
593 OUT_BATCH(vs_start << 25);
594 OUT_BATCH(GEN7_3DSTATE_URB_HS);
595 OUT_BATCH(vs_start << 25);
596 OUT_BATCH(GEN7_3DSTATE_URB_DS);
597 OUT_BATCH(vs_start << 25);
598 }
599
600 static void
gen8_emit_cc(struct intel_batchbuffer * batch)601 gen8_emit_cc(struct intel_batchbuffer *batch) {
602 OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS);
603 OUT_BATCH(cc.blend_state | 1);
604
605 OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS);
606 OUT_BATCH(cc.cc_state | 1);
607 }
608
609 static void
gen8_emit_multisample(struct intel_batchbuffer * batch)610 gen8_emit_multisample(struct intel_batchbuffer *batch) {
611 OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE | 0);
612 OUT_BATCH(0);
613
614 OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK);
615 OUT_BATCH(1);
616 }
617
618 static void
gen8_emit_vs(struct intel_batchbuffer * batch)619 gen8_emit_vs(struct intel_batchbuffer *batch) {
620 OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (11-2));
621 OUT_BATCH(0);
622 OUT_BATCH(0);
623 OUT_BATCH(0);
624 OUT_BATCH(0);
625 OUT_BATCH(0);
626 OUT_BATCH(0);
627 OUT_BATCH(0);
628 OUT_BATCH(0);
629 OUT_BATCH(0);
630 OUT_BATCH(0);
631
632 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS);
633 OUT_BATCH(0);
634
635 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS);
636 OUT_BATCH(0);
637
638 OUT_BATCH(GEN6_3DSTATE_VS | (9-2));
639 OUT_BATCH(0);
640 OUT_BATCH(0);
641 OUT_BATCH(0);
642 OUT_BATCH(0);
643 OUT_BATCH(0);
644 OUT_BATCH(0);
645 OUT_BATCH(0);
646 OUT_BATCH(0);
647 }
648
649 static void
gen8_emit_hs(struct intel_batchbuffer * batch)650 gen8_emit_hs(struct intel_batchbuffer *batch) {
651 OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (11-2));
652 OUT_BATCH(0);
653 OUT_BATCH(0);
654 OUT_BATCH(0);
655 OUT_BATCH(0);
656 OUT_BATCH(0);
657 OUT_BATCH(0);
658 OUT_BATCH(0);
659 OUT_BATCH(0);
660 OUT_BATCH(0);
661 OUT_BATCH(0);
662
663 OUT_BATCH(GEN7_3DSTATE_HS | (9-2));
664 OUT_BATCH(0);
665 OUT_BATCH(0);
666 OUT_BATCH(0);
667 OUT_BATCH(0);
668 OUT_BATCH(0);
669 OUT_BATCH(0);
670 OUT_BATCH(0);
671 OUT_BATCH(0);
672
673 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS);
674 OUT_BATCH(0);
675
676 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS);
677 OUT_BATCH(0);
678 }
679
680 static void
gen8_emit_gs(struct intel_batchbuffer * batch)681 gen8_emit_gs(struct intel_batchbuffer *batch) {
682 OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (11-2));
683 OUT_BATCH(0);
684 OUT_BATCH(0);
685 OUT_BATCH(0);
686 OUT_BATCH(0);
687 OUT_BATCH(0);
688 OUT_BATCH(0);
689 OUT_BATCH(0);
690 OUT_BATCH(0);
691 OUT_BATCH(0);
692 OUT_BATCH(0);
693
694 OUT_BATCH(GEN6_3DSTATE_GS | (10-2));
695 OUT_BATCH(0);
696 OUT_BATCH(0);
697 OUT_BATCH(0);
698 OUT_BATCH(0);
699 OUT_BATCH(0);
700 OUT_BATCH(0);
701 OUT_BATCH(0);
702 OUT_BATCH(0);
703 OUT_BATCH(0);
704
705 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS);
706 OUT_BATCH(0);
707
708 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS);
709 OUT_BATCH(0);
710 }
711
712 static void
gen9_emit_ds(struct intel_batchbuffer * batch)713 gen9_emit_ds(struct intel_batchbuffer *batch) {
714 OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (11-2));
715 OUT_BATCH(0);
716 OUT_BATCH(0);
717 OUT_BATCH(0);
718 OUT_BATCH(0);
719 OUT_BATCH(0);
720 OUT_BATCH(0);
721 OUT_BATCH(0);
722 OUT_BATCH(0);
723 OUT_BATCH(0);
724 OUT_BATCH(0);
725
726 OUT_BATCH(GEN7_3DSTATE_DS | (11-2));
727 OUT_BATCH(0);
728 OUT_BATCH(0);
729 OUT_BATCH(0);
730 OUT_BATCH(0);
731 OUT_BATCH(0);
732 OUT_BATCH(0);
733 OUT_BATCH(0);
734 OUT_BATCH(0);
735 OUT_BATCH(0);
736 OUT_BATCH(0);
737
738 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS);
739 OUT_BATCH(0);
740
741 OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS);
742 OUT_BATCH(0);
743 }
744
745
746 static void
gen8_emit_wm_hz_op(struct intel_batchbuffer * batch)747 gen8_emit_wm_hz_op(struct intel_batchbuffer *batch) {
748 OUT_BATCH(GEN8_3DSTATE_WM_HZ_OP | (5-2));
749 OUT_BATCH(0);
750 OUT_BATCH(0);
751 OUT_BATCH(0);
752 OUT_BATCH(0);
753 }
754
755 static void
gen8_emit_null_state(struct intel_batchbuffer * batch)756 gen8_emit_null_state(struct intel_batchbuffer *batch) {
757 gen8_emit_wm_hz_op(batch);
758 gen8_emit_hs(batch);
759 OUT_BATCH(GEN7_3DSTATE_TE | (4-2));
760 OUT_BATCH(0);
761 OUT_BATCH(0);
762 OUT_BATCH(0);
763 gen8_emit_gs(batch);
764 gen9_emit_ds(batch);
765 gen8_emit_vs(batch);
766 }
767
768 static void
gen7_emit_clip(struct intel_batchbuffer * batch)769 gen7_emit_clip(struct intel_batchbuffer *batch) {
770 OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
771 OUT_BATCH(0);
772 OUT_BATCH(0); /* pass-through */
773 OUT_BATCH(0);
774 }
775
776 static void
gen8_emit_sf(struct intel_batchbuffer * batch)777 gen8_emit_sf(struct intel_batchbuffer *batch)
778 {
779 int i;
780
781 OUT_BATCH(GEN7_3DSTATE_SBE | (6 - 2));
782 OUT_BATCH(1 << GEN7_SBE_NUM_OUTPUTS_SHIFT |
783 GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH |
784 GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET |
785 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
786 1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
787 OUT_BATCH(0);
788 OUT_BATCH(0);
789 OUT_BATCH(GEN9_SBE_ACTIVE_COMPONENT_XYZW << 0);
790 OUT_BATCH(0);
791
792 OUT_BATCH(GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
793 for (i = 0; i < 8; i++)
794 OUT_BATCH(0);
795 OUT_BATCH(0);
796 OUT_BATCH(0);
797
798 OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2));
799 OUT_BATCH(GEN8_RASTER_FRONT_WINDING_CCW | GEN8_RASTER_CULL_NONE);
800 OUT_BATCH(0);
801 OUT_BATCH(0);
802 OUT_BATCH(0);
803
804 OUT_BATCH(GEN6_3DSTATE_SF | (4 - 2));
805 OUT_BATCH(0);
806 OUT_BATCH(0);
807 OUT_BATCH(0);
808 }
809
810 static void
gen8_emit_ps(struct intel_batchbuffer * batch,uint32_t kernel)811 gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) {
812 const int max_threads = 63;
813
814 OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2));
815 OUT_BATCH(/* XXX: I don't understand the BARYCENTRIC stuff, but it
816 * appears we need it to put our setup data in the place we
817 * expect (g6, see below) */
818 GEN8_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC);
819
820 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (11-2));
821 OUT_BATCH(0);
822 OUT_BATCH(0);
823 OUT_BATCH(0);
824 OUT_BATCH(0);
825 OUT_BATCH(0);
826 OUT_BATCH(0);
827 OUT_BATCH(0);
828 OUT_BATCH(0);
829 OUT_BATCH(0);
830 OUT_BATCH(0);
831
832 OUT_BATCH(GEN7_3DSTATE_PS | (12-2));
833 OUT_BATCH(kernel);
834 OUT_BATCH(0); /* kernel hi */
835 OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
836 2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
837 OUT_BATCH(0); /* scratch space stuff */
838 OUT_BATCH(0); /* scratch hi */
839 OUT_BATCH((max_threads - 1) << GEN8_3DSTATE_PS_MAX_THREADS_SHIFT |
840 GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
841 OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT);
842 OUT_BATCH(0); // kernel 1
843 OUT_BATCH(0); /* kernel 1 hi */
844 OUT_BATCH(0); // kernel 2
845 OUT_BATCH(0); /* kernel 2 hi */
846
847 OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2));
848 OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT);
849
850 OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2));
851 OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE);
852 }
853
854 static void
gen9_emit_depth(struct intel_batchbuffer * batch)855 gen9_emit_depth(struct intel_batchbuffer *batch)
856 {
857 OUT_BATCH(GEN8_3DSTATE_WM_DEPTH_STENCIL | (4 - 2));
858 OUT_BATCH(0);
859 OUT_BATCH(0);
860 OUT_BATCH(0);
861
862 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (8-2));
863 OUT_BATCH(0);
864 OUT_BATCH(0);
865 OUT_BATCH(0);
866 OUT_BATCH(0);
867 OUT_BATCH(0);
868 OUT_BATCH(0);
869 OUT_BATCH(0);
870
871 OUT_BATCH(GEN8_3DSTATE_HIER_DEPTH_BUFFER | (5-2));
872 OUT_BATCH(0);
873 OUT_BATCH(0);
874 OUT_BATCH(0);
875 OUT_BATCH(0);
876
877 OUT_BATCH(GEN8_3DSTATE_STENCIL_BUFFER | (5-2));
878 OUT_BATCH(0);
879 OUT_BATCH(0);
880 OUT_BATCH(0);
881 OUT_BATCH(0);
882 }
883
884 static void
gen7_emit_clear(struct intel_batchbuffer * batch)885 gen7_emit_clear(struct intel_batchbuffer *batch) {
886 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3-2));
887 OUT_BATCH(0);
888 OUT_BATCH(1); // clear valid
889 }
890
891 static void
gen6_emit_drawing_rectangle(struct intel_batchbuffer * batch,const struct igt_buf * dst)892 gen6_emit_drawing_rectangle(struct intel_batchbuffer *batch, const struct igt_buf *dst)
893 {
894 OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
895 OUT_BATCH(0);
896 OUT_BATCH((igt_buf_height(dst) - 1) << 16 | (igt_buf_width(dst) - 1));
897 OUT_BATCH(0);
898 }
899
gen8_emit_vf_topology(struct intel_batchbuffer * batch)900 static void gen8_emit_vf_topology(struct intel_batchbuffer *batch)
901 {
902 OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY);
903 OUT_BATCH(_3DPRIM_RECTLIST);
904 }
905
906 /* Vertex elements MUST be defined before this according to spec */
gen8_emit_primitive(struct intel_batchbuffer * batch,uint32_t offset)907 static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset)
908 {
909 OUT_BATCH(GEN8_3DSTATE_VF | (2 - 2));
910 OUT_BATCH(0);
911
912 OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
913 OUT_BATCH(0);
914 OUT_BATCH(0);
915
916 OUT_BATCH(GEN4_3DPRIMITIVE | (7-2));
917 OUT_BATCH(0); /* gen8+ ignore the topology type field */
918 OUT_BATCH(3); /* vertex count */
919 OUT_BATCH(0); /* We're specifying this instead with offset in GEN6_3DSTATE_VERTEX_BUFFERS */
920 OUT_BATCH(1); /* single instance */
921 OUT_BATCH(0); /* start instance location */
922 OUT_BATCH(0); /* index buffer offset, ignored */
923 }
924
925 /* The general rule is if it's named gen6 it is directly copied from
926 * gen6_render_copyfunc.
927 *
928 * This sets up most of the 3d pipeline, and most of that to NULL state. The
929 * docs aren't specific about exactly what must be set up NULL, but the general
930 * rule is we could be run at any time, and so the most state we set to NULL,
931 * the better our odds of success.
932 *
933 * +---------------+ <---- 4096
934 * | ^ |
935 * | | |
936 * | various |
937 * | state |
938 * | | |
939 * |_______|_______| <---- 2048 + ?
940 * | ^ |
941 * | | |
942 * | batch |
943 * | commands |
944 * | | |
945 * | | |
946 * +---------------+ <---- 0 + ?
947 *
948 * The batch commands point to state within tthe batch, so all state offsets should be
949 * 0 < offset < 4096. Both commands and state build upwards, and are constructed
950 * in that order. This means too many batch commands can delete state if not
951 * careful.
952 *
953 */
954
955 #define BATCH_STATE_SPLIT 2048
956
957 static
_gen9_render_copyfunc(struct intel_batchbuffer * batch,drm_intel_context * context,const struct igt_buf * src,unsigned src_x,unsigned src_y,unsigned width,unsigned height,const struct igt_buf * dst,unsigned dst_x,unsigned dst_y,const uint32_t ps_kernel[][4],uint32_t ps_kernel_size)958 void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
959 drm_intel_context *context,
960 const struct igt_buf *src, unsigned src_x,
961 unsigned src_y, unsigned width, unsigned height,
962 const struct igt_buf *dst, unsigned dst_x,
963 unsigned dst_y, const uint32_t ps_kernel[][4],
964 uint32_t ps_kernel_size)
965 {
966 uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
967 uint32_t scissor_state;
968 uint32_t vertex_buffer;
969 uint32_t batch_end;
970
971 igt_assert(src->bpp == dst->bpp);
972 intel_batchbuffer_flush_with_context(batch, context);
973
974 intel_batchbuffer_align(batch, 8);
975
976 batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
977
978 annotation_init(&aub_annotations);
979
980 ps_binding_table = gen8_bind_surfaces(batch, src, dst);
981 ps_sampler_state = gen8_create_sampler(batch);
982 ps_kernel_off = gen8_fill_ps(batch, ps_kernel, ps_kernel_size);
983 vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
984 src_x, src_y,
985 dst_x, dst_y,
986 width, height);
987 cc.cc_state = gen6_create_cc_state(batch);
988 cc.blend_state = gen8_create_blend_state(batch);
989 viewport.cc_state = gen6_create_cc_viewport(batch);
990 viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch);
991 scissor_state = gen6_create_scissor_rect(batch);
992 /* TODO: theree is other state which isn't setup */
993
994 assert(batch->ptr < &batch->buffer[4095]);
995
996 batch->ptr = batch->buffer;
997
998 /* Start emitting the commands. The order roughly follows the mesa blorp
999 * order */
1000 OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
1001 GEN9_PIPELINE_SELECTION_MASK);
1002
1003 gen8_emit_sip(batch);
1004
1005 gen7_emit_push_constants(batch);
1006
1007 gen9_emit_state_base_address(batch);
1008
1009 OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1010 OUT_BATCH(viewport.cc_state);
1011 OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1012 OUT_BATCH(viewport.sf_clip_state);
1013
1014 gen7_emit_urb(batch);
1015
1016 gen8_emit_cc(batch);
1017
1018 gen8_emit_multisample(batch);
1019
1020 gen8_emit_null_state(batch);
1021
1022 OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (5 - 2));
1023 OUT_BATCH(0);
1024 OUT_BATCH(0);
1025 OUT_BATCH(0);
1026 OUT_BATCH(0);
1027
1028 gen7_emit_clip(batch);
1029
1030 gen8_emit_sf(batch);
1031
1032 gen8_emit_ps(batch, ps_kernel_off);
1033
1034 OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
1035 OUT_BATCH(ps_binding_table);
1036
1037 OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS);
1038 OUT_BATCH(ps_sampler_state);
1039
1040 OUT_BATCH(GEN8_3DSTATE_SCISSOR_STATE_POINTERS);
1041 OUT_BATCH(scissor_state);
1042
1043 gen9_emit_depth(batch);
1044
1045 gen7_emit_clear(batch);
1046
1047 gen6_emit_drawing_rectangle(batch, dst);
1048
1049 gen7_emit_vertex_buffer(batch, vertex_buffer);
1050 gen6_emit_vertex_elements(batch);
1051
1052 gen8_emit_vf_topology(batch);
1053 gen8_emit_primitive(batch, vertex_buffer);
1054
1055 OUT_BATCH(MI_BATCH_BUFFER_END);
1056
1057 batch_end = intel_batchbuffer_align(batch, 8);
1058 assert(batch_end < BATCH_STATE_SPLIT);
1059 annotation_add_batch(&aub_annotations, batch_end);
1060
1061 dump_batch(batch);
1062
1063 annotation_flush(&aub_annotations, batch);
1064
1065 gen6_render_flush(batch, context, batch_end);
1066 intel_batchbuffer_reset(batch);
1067 }
1068
gen9_render_copyfunc(struct intel_batchbuffer * batch,drm_intel_context * context,const struct igt_buf * src,unsigned src_x,unsigned src_y,unsigned width,unsigned height,const struct igt_buf * dst,unsigned dst_x,unsigned dst_y)1069 void gen9_render_copyfunc(struct intel_batchbuffer *batch,
1070 drm_intel_context *context,
1071 const struct igt_buf *src, unsigned src_x, unsigned src_y,
1072 unsigned width, unsigned height,
1073 const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
1074
1075 {
1076 _gen9_render_copyfunc(batch, context, src, src_x, src_y,
1077 width, height, dst, dst_x, dst_y, ps_kernel_gen9,
1078 sizeof(ps_kernel_gen9));
1079 }
1080
gen11_render_copyfunc(struct intel_batchbuffer * batch,drm_intel_context * context,const struct igt_buf * src,unsigned src_x,unsigned src_y,unsigned width,unsigned height,const struct igt_buf * dst,unsigned dst_x,unsigned dst_y)1081 void gen11_render_copyfunc(struct intel_batchbuffer *batch,
1082 drm_intel_context *context,
1083 const struct igt_buf *src, unsigned src_x, unsigned src_y,
1084 unsigned width, unsigned height,
1085 const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
1086
1087 {
1088 _gen9_render_copyfunc(batch, context, src, src_x, src_y,
1089 width, height, dst, dst_x, dst_y, ps_kernel_gen11,
1090 sizeof(ps_kernel_gen11));
1091 }
1092