1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_blorp.c
25 *
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
29 *
30 * GenX specific code for working with BLORP (blitting, resolves, clears
31 * on the 3D engine). This provides the driver-specific hooks needed to
32 * implement the BLORP API.
33 *
34 * See iris_blit.c, iris_clear.c, and so on.
35 */
36
37 #include <assert.h>
38
39 #include "iris_batch.h"
40 #include "iris_resource.h"
41 #include "iris_context.h"
42
43 #include "util/u_upload_mgr.h"
44 #include "intel/common/intel_l3_config.h"
45 #include "intel/compiler/brw_compiler.h"
46
47 #include "genxml/gen_macros.h"
48
49 #if GFX_VER >= 9
50 #include "blorp/blorp_genX_exec_brw.h"
51 #else
52 #include "blorp/blorp_genX_exec_elk.h"
53 #endif
54
55 static uint32_t *
stream_state(struct iris_batch * batch,struct u_upload_mgr * uploader,unsigned size,unsigned alignment,uint32_t * out_offset,struct iris_bo ** out_bo)56 stream_state(struct iris_batch *batch,
57 struct u_upload_mgr *uploader,
58 unsigned size,
59 unsigned alignment,
60 uint32_t *out_offset,
61 struct iris_bo **out_bo)
62 {
63 struct pipe_resource *res = NULL;
64 void *ptr = NULL;
65
66 u_upload_alloc(uploader, 0, size, alignment, out_offset, &res, &ptr);
67
68 struct iris_bo *bo = iris_resource_bo(res);
69 iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
70
71 iris_record_state_size(batch->state_sizes,
72 bo->address + *out_offset, size);
73
74 /* If the caller has asked for a BO, we leave them the responsibility of
75 * adding bo->address (say, by handing an address to genxml). If not,
76 * we assume they want the offset from a base address.
77 */
78 if (out_bo)
79 *out_bo = bo;
80 else
81 *out_offset += iris_bo_offset_from_base_address(bo);
82
83 pipe_resource_reference(&res, NULL);
84
85 return ptr;
86 }
87
88 static void *
blorp_emit_dwords(struct blorp_batch * blorp_batch,unsigned n)89 blorp_emit_dwords(struct blorp_batch *blorp_batch, unsigned n)
90 {
91 struct iris_batch *batch = blorp_batch->driver_batch;
92 return iris_get_command_space(batch, n * sizeof(uint32_t));
93 }
94
95 static uint64_t
combine_and_pin_address(struct blorp_batch * blorp_batch,struct blorp_address addr)96 combine_and_pin_address(struct blorp_batch *blorp_batch,
97 struct blorp_address addr)
98 {
99 struct iris_batch *batch = blorp_batch->driver_batch;
100 struct iris_bo *bo = addr.buffer;
101
102 iris_use_pinned_bo(batch, bo,
103 addr.reloc_flags & IRIS_BLORP_RELOC_FLAGS_EXEC_OBJECT_WRITE,
104 IRIS_DOMAIN_NONE);
105
106 /* Assume this is a general address, not relative to a base. */
107 return bo->address + addr.offset;
108 }
109
110 static uint64_t
blorp_emit_reloc(struct blorp_batch * blorp_batch,UNUSED void * location,struct blorp_address addr,uint32_t delta)111 blorp_emit_reloc(struct blorp_batch *blorp_batch, UNUSED void *location,
112 struct blorp_address addr, uint32_t delta)
113 {
114 return combine_and_pin_address(blorp_batch, addr) + delta;
115 }
116
117 static void
blorp_surface_reloc(struct blorp_batch * blorp_batch,uint32_t ss_offset,struct blorp_address addr,uint32_t delta)118 blorp_surface_reloc(struct blorp_batch *blorp_batch, uint32_t ss_offset,
119 struct blorp_address addr, uint32_t delta)
120 {
121 /* Let blorp_get_surface_address do the pinning. */
122 }
123
124 static uint64_t
blorp_get_surface_address(struct blorp_batch * blorp_batch,struct blorp_address addr)125 blorp_get_surface_address(struct blorp_batch *blorp_batch,
126 struct blorp_address addr)
127 {
128 return combine_and_pin_address(blorp_batch, addr);
129 }
130
131 UNUSED static struct blorp_address
blorp_get_surface_base_address(UNUSED struct blorp_batch * blorp_batch)132 blorp_get_surface_base_address(UNUSED struct blorp_batch *blorp_batch)
133 {
134 return (struct blorp_address) { .offset = IRIS_MEMZONE_BINDER_START };
135 }
136
137 static void *
blorp_alloc_dynamic_state(struct blorp_batch * blorp_batch,uint32_t size,uint32_t alignment,uint32_t * offset)138 blorp_alloc_dynamic_state(struct blorp_batch *blorp_batch,
139 uint32_t size,
140 uint32_t alignment,
141 uint32_t *offset)
142 {
143 struct iris_context *ice = blorp_batch->blorp->driver_ctx;
144 struct iris_batch *batch = blorp_batch->driver_batch;
145
146 return stream_state(batch, ice->state.dynamic_uploader,
147 size, alignment, offset, NULL);
148 }
149
150 UNUSED static void *
blorp_alloc_general_state(struct blorp_batch * blorp_batch,uint32_t size,uint32_t alignment,uint32_t * offset)151 blorp_alloc_general_state(struct blorp_batch *blorp_batch,
152 uint32_t size,
153 uint32_t alignment,
154 uint32_t *offset)
155 {
156 /* Use dynamic state range for general state on iris. */
157 return blorp_alloc_dynamic_state(blorp_batch, size, alignment, offset);
158 }
159
160 static bool
blorp_alloc_binding_table(struct blorp_batch * blorp_batch,unsigned num_entries,unsigned state_size,unsigned state_alignment,uint32_t * out_bt_offset,uint32_t * surface_offsets,void ** surface_maps)161 blorp_alloc_binding_table(struct blorp_batch *blorp_batch,
162 unsigned num_entries,
163 unsigned state_size,
164 unsigned state_alignment,
165 uint32_t *out_bt_offset,
166 uint32_t *surface_offsets,
167 void **surface_maps)
168 {
169 struct iris_context *ice = blorp_batch->blorp->driver_ctx;
170 struct iris_binder *binder = &ice->state.binder;
171 struct iris_batch *batch = blorp_batch->driver_batch;
172
173 unsigned bt_offset =
174 iris_binder_reserve(ice, num_entries * sizeof(uint32_t));
175 uint32_t *bt_map = binder->map + bt_offset;
176
177 uint32_t surf_base_offset = GFX_VER < 11 ? binder->bo->address : 0;
178
179 *out_bt_offset = bt_offset;
180
181 for (unsigned i = 0; i < num_entries; i++) {
182 surface_maps[i] = stream_state(batch, ice->state.surface_uploader,
183 state_size, state_alignment,
184 &surface_offsets[i], NULL);
185 bt_map[i] = surface_offsets[i] - surf_base_offset;
186 }
187
188 iris_use_pinned_bo(batch, binder->bo, false, IRIS_DOMAIN_NONE);
189
190 batch->screen->vtbl.update_binder_address(batch, binder);
191
192 return true;
193 }
194
195 static uint32_t
blorp_binding_table_offset_to_pointer(struct blorp_batch * batch,uint32_t offset)196 blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
197 uint32_t offset)
198 {
199 /* See IRIS_BT_OFFSET_SHIFT in iris_state.c */
200 return offset >> ((GFX_VER >= 11 && GFX_VERx10 < 125) ? 3 : 0);
201 }
202
203 static void *
blorp_alloc_vertex_buffer(struct blorp_batch * blorp_batch,uint32_t size,struct blorp_address * addr)204 blorp_alloc_vertex_buffer(struct blorp_batch *blorp_batch,
205 uint32_t size,
206 struct blorp_address *addr)
207 {
208 struct iris_context *ice = blorp_batch->blorp->driver_ctx;
209 struct iris_batch *batch = blorp_batch->driver_batch;
210 struct iris_bo *bo;
211 uint32_t offset;
212
213 void *map = stream_state(batch, ice->ctx.const_uploader, size, 64,
214 &offset, &bo);
215
216 *addr = (struct blorp_address) {
217 .buffer = bo,
218 .offset = offset,
219 .mocs = iris_mocs(bo, &batch->screen->isl_dev,
220 ISL_SURF_USAGE_VERTEX_BUFFER_BIT),
221 .local_hint = iris_bo_likely_local(bo),
222 };
223
224 return map;
225 }
226
227 /**
228 * See iris_upload_render_state's IRIS_DIRTY_VERTEX_BUFFERS handling for
229 * a comment about why these VF invalidations are needed.
230 */
231 static void
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch * blorp_batch,const struct blorp_address * addrs,UNUSED uint32_t * sizes,unsigned num_vbs)232 blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
233 const struct blorp_address *addrs,
234 UNUSED uint32_t *sizes,
235 unsigned num_vbs)
236 {
237 #if GFX_VER < 11
238 struct iris_context *ice = blorp_batch->blorp->driver_ctx;
239 struct iris_batch *batch = blorp_batch->driver_batch;
240 bool need_invalidate = false;
241
242 for (unsigned i = 0; i < num_vbs; i++) {
243 struct iris_bo *bo = addrs[i].buffer;
244 uint16_t high_bits = bo->address >> 32u;
245
246 if (high_bits != ice->state.last_vbo_high_bits[i]) {
247 need_invalidate = true;
248 ice->state.last_vbo_high_bits[i] = high_bits;
249 }
250 }
251
252 if (need_invalidate) {
253 iris_emit_pipe_control_flush(batch,
254 "workaround: VF cache 32-bit key [blorp]",
255 PIPE_CONTROL_VF_CACHE_INVALIDATE |
256 PIPE_CONTROL_CS_STALL);
257 }
258 #endif
259 }
260
261 static struct blorp_address
blorp_get_workaround_address(struct blorp_batch * blorp_batch)262 blorp_get_workaround_address(struct blorp_batch *blorp_batch)
263 {
264 struct iris_batch *batch = blorp_batch->driver_batch;
265
266 return (struct blorp_address) {
267 .buffer = batch->screen->workaround_address.bo,
268 .offset = batch->screen->workaround_address.offset,
269 .local_hint =
270 iris_bo_likely_local(batch->screen->workaround_address.bo),
271 };
272 }
273
274 static void
blorp_flush_range(UNUSED struct blorp_batch * blorp_batch,UNUSED void * start,UNUSED size_t size)275 blorp_flush_range(UNUSED struct blorp_batch *blorp_batch,
276 UNUSED void *start,
277 UNUSED size_t size)
278 {
279 /* All allocated states come from the batch which we will flush before we
280 * submit it. There's nothing for us to do here.
281 */
282 }
283
284 static void
blorp_pre_emit_urb_config(struct blorp_batch * blorp_batch,struct intel_urb_config * urb_cfg)285 blorp_pre_emit_urb_config(struct blorp_batch *blorp_batch,
286 struct intel_urb_config *urb_cfg)
287 {
288 genX(urb_workaround)(blorp_batch->driver_batch, urb_cfg);
289 }
290
291 static const struct intel_l3_config *
blorp_get_l3_config(struct blorp_batch * blorp_batch)292 blorp_get_l3_config(struct blorp_batch *blorp_batch)
293 {
294 struct iris_batch *batch = blorp_batch->driver_batch;
295 return batch->screen->l3_config_3d;
296 }
297
298 static void
iris_blorp_exec_render(struct blorp_batch * blorp_batch,const struct blorp_params * params)299 iris_blorp_exec_render(struct blorp_batch *blorp_batch,
300 const struct blorp_params *params)
301 {
302 struct iris_context *ice = blorp_batch->blorp->driver_ctx;
303 struct iris_batch *batch = blorp_batch->driver_batch;
304 uint32_t pc_flags = 0;
305
306 #if GFX_VER >= 11
307 /* The PIPE_CONTROL command description says:
308 *
309 * "Whenever a Binding Table Index (BTI) used by a Render Target Message
310 * points to a different RENDER_SURFACE_STATE, SW must issue a Render
311 * Target Cache Flush by enabling this bit. When render target flush
312 * is set due to new association of BTI, PS Scoreboard Stall bit must
313 * be set in this packet."
314 */
315 pc_flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
316 PIPE_CONTROL_STALL_AT_SCOREBOARD;
317 #endif
318
319 /* Check if blorp ds state matches ours. */
320 if (intel_needs_workaround(batch->screen->devinfo, 18019816803)) {
321 const bool blorp_ds_state =
322 params->depth.enabled || params->stencil.enabled;
323 if (ice->state.ds_write_state != blorp_ds_state) {
324 pc_flags |= PIPE_CONTROL_PSS_STALL_SYNC;
325 ice->state.ds_write_state = blorp_ds_state;
326 }
327 }
328
329 if (pc_flags != 0) {
330 iris_emit_pipe_control_flush(batch,
331 "workaround: prior to [blorp]",
332 pc_flags);
333 }
334
335 if (params->depth.enabled &&
336 !(blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
337 genX(emit_depth_state_workarounds)(ice, batch, ¶ms->depth.surf);
338
339 iris_require_command_space(batch, 1400);
340
341 #if GFX_VER == 8
342 genX(update_pma_fix)(ice, batch, false);
343 #endif
344
345 const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
346 if (ice->state.current_hash_scale != scale) {
347 genX(emit_hashing_mode)(ice, batch, params->x1 - params->x0,
348 params->y1 - params->y0, scale);
349 }
350
351 #if GFX_VERx10 == 125
352 iris_use_pinned_bo(batch, iris_resource_bo(ice->state.pixel_hashing_tables),
353 false, IRIS_DOMAIN_NONE);
354 #else
355 assert(!ice->state.pixel_hashing_tables);
356 #endif
357
358 #if GFX_VER >= 12
359 genX(invalidate_aux_map_state)(batch);
360 #endif
361
362 iris_handle_always_flush_cache(batch);
363
364 blorp_exec(blorp_batch, params);
365
366 iris_handle_always_flush_cache(batch);
367
368 /* We've smashed all state compared to what the normal 3D pipeline
369 * rendering tracks for GL.
370 */
371
372 uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
373 IRIS_DIRTY_SO_BUFFERS |
374 IRIS_DIRTY_SO_DECL_LIST |
375 IRIS_DIRTY_LINE_STIPPLE |
376 IRIS_ALL_DIRTY_FOR_COMPUTE |
377 IRIS_DIRTY_SCISSOR_RECT |
378 IRIS_DIRTY_VF);
379 /* Wa_14016820455
380 * On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
381 * likely by a read cache invalidation when clipping is disabled, so we
382 * don't skip its dirty bit here, in order to reprogram it.
383 */
384 if (GFX_VERx10 != 125)
385 skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
386
387 uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
388 IRIS_STAGE_DIRTY_UNCOMPILED_VS |
389 IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
390 IRIS_STAGE_DIRTY_UNCOMPILED_TES |
391 IRIS_STAGE_DIRTY_UNCOMPILED_GS |
392 IRIS_STAGE_DIRTY_UNCOMPILED_FS |
393 IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
394 IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
395 IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
396 IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
397
398 if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
399 /* BLORP disabled tessellation, but it was already off anyway */
400 skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
401 IRIS_STAGE_DIRTY_TES |
402 IRIS_STAGE_DIRTY_CONSTANTS_TCS |
403 IRIS_STAGE_DIRTY_CONSTANTS_TES |
404 IRIS_STAGE_DIRTY_BINDINGS_TCS |
405 IRIS_STAGE_DIRTY_BINDINGS_TES;
406 }
407
408 if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
409 /* BLORP disabled geometry shaders, but it was already off anyway */
410 skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
411 IRIS_STAGE_DIRTY_CONSTANTS_GS |
412 IRIS_STAGE_DIRTY_BINDINGS_GS;
413 }
414
415 /* we can skip flagging IRIS_DIRTY_DEPTH_BUFFER, if
416 * BLORP_BATCH_NO_EMIT_DEPTH_STENCIL is set.
417 */
418 if (blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)
419 skip_bits |= IRIS_DIRTY_DEPTH_BUFFER;
420
421 if (!params->wm_prog_data)
422 skip_bits |= IRIS_DIRTY_BLEND_STATE | IRIS_DIRTY_PS_BLEND;
423
424 ice->state.dirty |= ~skip_bits;
425 ice->state.stage_dirty |= ~skip_stage_bits;
426
427 for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
428 ice->shaders.urb.cfg.size[i] = 0;
429
430 if (params->src.enabled)
431 iris_bo_bump_seqno(params->src.addr.buffer, batch->next_seqno,
432 IRIS_DOMAIN_SAMPLER_READ);
433 if (params->dst.enabled)
434 iris_bo_bump_seqno(params->dst.addr.buffer, batch->next_seqno,
435 IRIS_DOMAIN_RENDER_WRITE);
436 if (params->depth.enabled)
437 iris_bo_bump_seqno(params->depth.addr.buffer, batch->next_seqno,
438 IRIS_DOMAIN_DEPTH_WRITE);
439 if (params->stencil.enabled)
440 iris_bo_bump_seqno(params->stencil.addr.buffer, batch->next_seqno,
441 IRIS_DOMAIN_DEPTH_WRITE);
442 }
443
444 static void
iris_blorp_exec_blitter(struct blorp_batch * blorp_batch,const struct blorp_params * params)445 iris_blorp_exec_blitter(struct blorp_batch *blorp_batch,
446 const struct blorp_params *params)
447 {
448 struct iris_batch *batch = blorp_batch->driver_batch;
449
450 /* Around the length of a XY_BLOCK_COPY_BLT and MI_FLUSH_DW */
451 iris_require_command_space(batch, 108);
452
453 iris_handle_always_flush_cache(batch);
454
455 blorp_exec(blorp_batch, params);
456
457 iris_handle_always_flush_cache(batch);
458
459 if (params->src.enabled) {
460 iris_bo_bump_seqno(params->src.addr.buffer, batch->next_seqno,
461 IRIS_DOMAIN_OTHER_READ);
462 }
463
464 iris_bo_bump_seqno(params->dst.addr.buffer, batch->next_seqno,
465 IRIS_DOMAIN_OTHER_WRITE);
466 }
467
468 static void
iris_blorp_exec(struct blorp_batch * blorp_batch,const struct blorp_params * params)469 iris_blorp_exec(struct blorp_batch *blorp_batch,
470 const struct blorp_params *params)
471 {
472 if (blorp_batch->flags & BLORP_BATCH_USE_BLITTER)
473 iris_blorp_exec_blitter(blorp_batch, params);
474 else
475 iris_blorp_exec_render(blorp_batch, params);
476 }
477
478 static void
blorp_measure_start(struct blorp_batch * blorp_batch,const struct blorp_params * params)479 blorp_measure_start(struct blorp_batch *blorp_batch,
480 const struct blorp_params *params)
481 {
482 struct iris_context *ice = blorp_batch->blorp->driver_ctx;
483 struct iris_batch *batch = blorp_batch->driver_batch;
484
485 trace_intel_begin_blorp(&batch->trace);
486
487 if (batch->measure == NULL)
488 return;
489
490 iris_measure_snapshot(ice, batch,
491 blorp_op_to_intel_measure_snapshot(params->op),
492 NULL, NULL, NULL);
493 }
494
495
496 static void
blorp_measure_end(struct blorp_batch * blorp_batch,const struct blorp_params * params)497 blorp_measure_end(struct blorp_batch *blorp_batch,
498 const struct blorp_params *params)
499 {
500 struct iris_batch *batch = blorp_batch->driver_batch;
501
502 trace_intel_end_blorp(&batch->trace,
503 params->op,
504 params->x1 - params->x0,
505 params->y1 - params->y0,
506 params->num_samples,
507 params->shader_pipeline,
508 params->dst.view.format,
509 params->src.view.format,
510 (blorp_batch->flags & BLORP_BATCH_PREDICATE_ENABLE));
511 }
512
513 void
genX(init_blorp)514 genX(init_blorp)(struct iris_context *ice)
515 {
516 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
517
518 #if GFX_VER >= 9
519 blorp_init_brw(&ice->blorp, ice, &screen->isl_dev, screen->brw, NULL);
520 #else
521 blorp_init_elk(&ice->blorp, ice, &screen->isl_dev, screen->elk, NULL);
522 #endif
523 ice->blorp.lookup_shader = iris_blorp_lookup_shader;
524 ice->blorp.upload_shader = iris_blorp_upload_shader;
525 ice->blorp.exec = iris_blorp_exec;
526 ice->blorp.enable_tbimr = screen->driconf.enable_tbimr;
527 }
528
529 static void
blorp_emit_pre_draw(struct blorp_batch * blorp_batch,const struct blorp_params * params)530 blorp_emit_pre_draw(struct blorp_batch *blorp_batch, const struct blorp_params *params)
531 {
532 struct iris_batch *batch = blorp_batch->driver_batch;
533 blorp_measure_start(blorp_batch, params);
534 genX(maybe_emit_breakpoint)(batch, true);
535 }
536
537 static void
blorp_emit_post_draw(struct blorp_batch * blorp_batch,const struct blorp_params * params)538 blorp_emit_post_draw(struct blorp_batch *blorp_batch, const struct blorp_params *params)
539 {
540 struct iris_batch *batch = blorp_batch->driver_batch;
541
542 // A _3DPRIM_RECTLIST is a MESA_PRIM_QUAD_STRIP with a implied vertex
543 genX(emit_3dprimitive_was)(batch, NULL, MESA_PRIM_QUAD_STRIP, 3);
544 genX(maybe_emit_breakpoint)(batch, false);
545 blorp_measure_end(blorp_batch, params);
546 }
547