1 /*
2 * Copyright © 2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef GENX_CMD_DRAW_GENERATED_INDIRECT_H
25 #define GENX_CMD_DRAW_GENERATED_INDIRECT_H
26
27 #include <assert.h>
28 #include <stdbool.h>
29
30 #include "util/macros.h"
31
32 #include "common/intel_genX_state_brw.h"
33
34 #include "anv_private.h"
35 #include "anv_internal_kernels.h"
36
37 /* This is a maximum number of items a fragment shader can generate due to the
38 * viewport size.
39 */
40 #define MAX_GENERATED_DRAW_COUNT (8192 * 8192)
41
42 #define MAX_RING_BO_ITEMS (8192)
43
44 static struct anv_state
genX(cmd_buffer_emit_generate_draws)45 genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
46 struct anv_simple_shader *simple_state,
47 struct anv_address generated_cmds_addr,
48 uint32_t generated_cmd_stride,
49 struct anv_address indirect_data_addr,
50 uint32_t indirect_data_stride,
51 struct anv_address draw_id_addr,
52 uint32_t item_base,
53 uint32_t item_count,
54 struct anv_address count_addr,
55 uint32_t max_count,
56 bool indexed,
57 uint32_t ring_count)
58 {
59 struct anv_device *device = cmd_buffer->device;
60
61 struct anv_state push_data_state =
62 genX(simple_shader_alloc_push)(simple_state,
63 sizeof(struct anv_gen_indirect_params));
64 if (push_data_state.map == NULL)
65 return ANV_STATE_NULL;
66
67 struct anv_graphics_pipeline *pipeline =
68 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
69 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
70 const bool use_tbimr = cmd_buffer->state.gfx.dyn_state.use_tbimr;
71
72 struct anv_address draw_count_addr;
73 if (anv_address_is_null(count_addr)) {
74 draw_count_addr = anv_address_add(
75 genX(simple_shader_push_state_address)(simple_state, push_data_state),
76 offsetof(struct anv_gen_indirect_params, draw_count));
77 } else {
78 draw_count_addr = count_addr;
79 }
80
81 struct anv_gen_indirect_params *push_data = push_data_state.map;
82 *push_data = (struct anv_gen_indirect_params) {
83 .draw_id_addr = anv_address_physical(draw_id_addr),
84 .indirect_data_addr = anv_address_physical(indirect_data_addr),
85 .indirect_data_stride = indirect_data_stride,
86 .flags = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) |
87 (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
88 (cmd_buffer->state.conditional_render_enabled ?
89 ANV_GENERATED_FLAG_PREDICATED : 0) |
90 ((vs_prog_data->uses_firstvertex ||
91 vs_prog_data->uses_baseinstance) ?
92 ANV_GENERATED_FLAG_BASE : 0) |
93 (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
94 (anv_mocs(device, indirect_data_addr.bo,
95 ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
96 (!anv_address_is_null(count_addr) ?
97 ANV_GENERATED_FLAG_COUNT : 0) |
98 (ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) |
99 ((generated_cmd_stride / 4) << 16),
100 .draw_base = item_base,
101 .max_draw_count = max_count,
102 .ring_count = ring_count,
103 .instance_multiplier = pipeline->instance_multiplier,
104 .draw_count = anv_address_is_null(count_addr) ? max_count : 0,
105 .generated_cmds_addr = anv_address_physical(generated_cmds_addr),
106 .draw_count_addr = anv_address_physical(draw_count_addr),
107 };
108
109 genX(emit_simple_shader_dispatch)(simple_state, item_count, push_data_state);
110
111 return push_data_state;
112 }
113
114 static void
genX(cmd_buffer_emit_indirect_generated_draws_init)115 genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
116 {
117 anv_batch_emit_ensure_space(&cmd_buffer->generation.batch, 4);
118
119 trace_intel_begin_generate_draws(&cmd_buffer->trace);
120
121 anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
122 bbs.AddressSpaceIndicator = ASI_PPGTT;
123 bbs.BatchBufferStartAddress =
124 anv_batch_current_address(&cmd_buffer->generation.batch);
125 }
126
127 cmd_buffer->generation.return_addr = anv_batch_current_address(&cmd_buffer->batch);
128
129 #if GFX_VER >= 12
130 anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
131 arb.PreParserDisableMask = true;
132 arb.PreParserDisable = false;
133 }
134 #endif
135
136 trace_intel_end_generate_draws(&cmd_buffer->trace);
137
138 struct anv_device *device = cmd_buffer->device;
139 struct anv_simple_shader *state = &cmd_buffer->generation.shader_state;
140 *state = (struct anv_simple_shader) {
141 .device = device,
142 .cmd_buffer = cmd_buffer,
143 .dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
144 .general_state_stream = &cmd_buffer->general_state_stream,
145 .batch = &cmd_buffer->generation.batch,
146 .kernel = device->internal_kernels[
147 ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
148 .l3_config = device->internal_kernels_l3_config,
149 .urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
150 };
151
152 genX(emit_simple_shader_init)(state);
153 }
154
155 static struct anv_address
genX(cmd_buffer_get_draw_id_addr)156 genX(cmd_buffer_get_draw_id_addr)(struct anv_cmd_buffer *cmd_buffer,
157 uint32_t draw_id_count)
158 {
159 #if GFX_VER >= 11
160 return ANV_NULL_ADDRESS;
161 #else
162 struct anv_graphics_pipeline *pipeline =
163 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
164 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
165 if (!vs_prog_data->uses_drawid)
166 return ANV_NULL_ADDRESS;
167
168 struct anv_state draw_id_state =
169 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * draw_id_count, 4);
170 return anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
171 draw_id_state);
172 #endif
173 }
174
175 static uint32_t
genX(cmd_buffer_get_generated_draw_stride)176 genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer)
177 {
178 /* With the extended parameters in 3DPRIMITIVE on Gfx11+ we can emit
179 * everything. Prior to this, we need to emit a couple of
180 * VERTEX_BUFFER_STATE.
181 */
182 #if GFX_VER >= 11
183 return 4 * GENX(3DPRIMITIVE_EXTENDED_length);
184 #else
185 struct anv_graphics_pipeline *pipeline =
186 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
187 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
188
189 uint32_t len = 0;
190
191 if (vs_prog_data->uses_firstvertex ||
192 vs_prog_data->uses_baseinstance ||
193 vs_prog_data->uses_drawid) {
194 len += 4; /* 3DSTATE_VERTEX_BUFFERS */
195
196 if (vs_prog_data->uses_firstvertex ||
197 vs_prog_data->uses_baseinstance)
198 len += 4 * GENX(VERTEX_BUFFER_STATE_length);
199
200 if (vs_prog_data->uses_drawid)
201 len += 4 * GENX(VERTEX_BUFFER_STATE_length);
202 }
203
204 return len + 4 * GENX(3DPRIMITIVE_length);
205 #endif
206 }
207
208 static void
genX(cmd_buffer_rewrite_forward_end_addr)209 genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
210 struct anv_gen_indirect_params *params)
211 {
212 /* We don't know the end_addr until we have emitted all the generation
213 * draws. Go and edit the address of all the push parameters.
214 */
215 uint64_t end_addr =
216 anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
217 while (params != NULL) {
218 params->end_addr = end_addr;
219 params = params->prev;
220 }
221 }
222
223 static void
genX(cmd_buffer_emit_indirect_generated_draws_inplace)224 genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cmd_buffer,
225 struct anv_address indirect_data_addr,
226 uint32_t indirect_data_stride,
227 struct anv_address count_addr,
228 uint32_t max_draw_count,
229 bool indexed)
230 {
231 const bool start_generation_batch =
232 anv_address_is_null(cmd_buffer->generation.return_addr);
233
234 genX(flush_pipeline_select_3d)(cmd_buffer);
235
236 struct anv_address draw_id_addr =
237 genX(cmd_buffer_get_draw_id_addr)(cmd_buffer, max_draw_count);
238
239 #if GFX_VER == 9
240 /* Mark the VB-0 as using the entire dynamic state pool area, but only for
241 * the draw call starting the generation batch. All the following ones will
242 * use the same area.
243 */
244 if (start_generation_batch) {
245 struct anv_device *device = cmd_buffer->device;
246 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
247 cmd_buffer, 0,
248 (struct anv_address) {
249 .offset = device->physical->va.dynamic_state_pool.addr,
250 },
251 device->physical->va.dynamic_state_pool.size);
252 }
253
254 struct anv_graphics_pipeline *pipeline =
255 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
256 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
257
258 if (vs_prog_data->uses_baseinstance ||
259 vs_prog_data->uses_firstvertex) {
260 /* We're using the indirect buffer directly to source base instance &
261 * first vertex values. Mark the entire area as used.
262 */
263 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
264 indirect_data_addr,
265 indirect_data_stride * max_draw_count);
266 }
267
268 if (vs_prog_data->uses_drawid) {
269 /* Mark the whole draw id buffer as used. */
270 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
271 draw_id_addr,
272 sizeof(uint32_t) * max_draw_count);
273 }
274 #endif
275
276 /* Apply the pipeline flush here so the indirect data is available for the
277 * generation shader.
278 */
279 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
280
281 if (start_generation_batch)
282 genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
283
284 if (cmd_buffer->state.conditional_render_enabled)
285 genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
286
287 /* Emit the 3D state in the main batch. */
288 genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
289
290 const uint32_t draw_cmd_stride =
291 genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
292
293 struct anv_gen_indirect_params *last_params = NULL;
294 uint32_t item_base = 0;
295 while (item_base < max_draw_count) {
296 const uint32_t item_count = MIN2(max_draw_count - item_base,
297 MAX_GENERATED_DRAW_COUNT);
298 const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
299
300 /* Ensure we have enough contiguous space for all the draws so that the
301 * compute shader can edit all the 3DPRIMITIVEs from a single base
302 * address.
303 *
304 * TODO: we might have to split that if the amount of space is to large (at
305 * 1Mb?).
306 */
307 VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
308 draw_cmd_size);
309 if (result != VK_SUCCESS)
310 return;
311
312 struct anv_state params_state =
313 genX(cmd_buffer_emit_generate_draws)(
314 cmd_buffer,
315 &cmd_buffer->generation.shader_state,
316 anv_batch_current_address(&cmd_buffer->batch),
317 draw_cmd_stride,
318 indirect_data_addr,
319 indirect_data_stride,
320 anv_address_add(draw_id_addr, 4 * item_base),
321 item_base,
322 item_count,
323 count_addr,
324 max_draw_count,
325 indexed,
326 0 /* ring_count */);
327 struct anv_gen_indirect_params *params = params_state.map;
328 if (params == NULL)
329 return;
330
331 anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
332
333 item_base += item_count;
334
335 params->prev = last_params;
336 last_params = params;
337 }
338
339 genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params);
340
341 #if GFX_VER == 9
342 update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, indexed ? RANDOM : SEQUENTIAL);
343 #endif
344 }
345
346 static void
genX(cmd_buffer_emit_indirect_generated_draws_inring)347 genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd_buffer,
348 struct anv_address indirect_data_addr,
349 uint32_t indirect_data_stride,
350 struct anv_address count_addr,
351 uint32_t max_draw_count,
352 bool indexed)
353 {
354 struct anv_device *device = cmd_buffer->device;
355
356 genX(flush_pipeline_select_3d)(cmd_buffer);
357
358 const uint32_t draw_cmd_stride =
359 genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
360
361 if (cmd_buffer->generation.ring_bo == NULL) {
362 const uint32_t bo_size = align(
363 #if GFX_VER >= 12
364 GENX(MI_ARB_CHECK_length) * 4 +
365 #endif
366 draw_cmd_stride * MAX_RING_BO_ITEMS +
367 #if GFX_VER == 9
368 4 * MAX_RING_BO_ITEMS +
369 #endif
370 GENX(MI_BATCH_BUFFER_START_length) * 4,
371 4096);
372 VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, bo_size,
373 &cmd_buffer->generation.ring_bo);
374 if (result != VK_SUCCESS) {
375 anv_batch_set_error(&cmd_buffer->batch, result);
376 return;
377 }
378 }
379
380 /* How many items will be generated by each iteration of the generation
381 * shader dispatch.
382 */
383 const uint32_t ring_count = MIN2(MAX_RING_BO_ITEMS, max_draw_count);
384
385 /* The ring bo has the following layout:
386 *
387 * --------------------------------------------------
388 * | MI_ARB_CHECK to resume CS prefetch (Gfx12+) |
389 * |------------------------------------------------|
390 * | ring_count * 3DPRIMITIVE |
391 * |------------------------------------------------|
392 * | jump instruction (either back to generate more |
393 * | commands or to the next set of commands) |
394 * |------------------------------------------------|
395 * | draw ids (only used on Gfx9) |
396 * --------------------------------------------------
397 */
398
399 struct anv_address draw_id_addr = (struct anv_address) {
400 .bo = cmd_buffer->generation.ring_bo,
401 .offset = ring_count * draw_cmd_stride +
402 GENX(MI_BATCH_BUFFER_START_length) * 4,
403 };
404
405 struct anv_address draw_cmds_addr = (struct anv_address) {
406 .bo = cmd_buffer->generation.ring_bo,
407 #if GFX_VER >= 12
408 .offset = GENX(MI_ARB_CHECK_length) * 4,
409 #endif
410 };
411
412 #if GFX_VER >= 12
413 struct GENX(MI_ARB_CHECK) resume_prefetch = {
414 .PreParserDisableMask = true,
415 .PreParserDisable = false,
416 };
417 GENX(MI_ARB_CHECK_pack)(NULL, cmd_buffer->generation.ring_bo->map,
418 &resume_prefetch);
419 #endif
420
421 #if GFX_VER == 9
422 /* Mark the VB-0 as using the entire ring_bo, but only for the draw call
423 * starting the generation batch. All the following ones will use the same
424 * area.
425 */
426 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
427 cmd_buffer, 0,
428 (struct anv_address) {
429 .bo = cmd_buffer->generation.ring_bo,
430 },
431 cmd_buffer->generation.ring_bo->size);
432
433 struct anv_graphics_pipeline *pipeline =
434 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
435 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
436
437 if (vs_prog_data->uses_baseinstance ||
438 vs_prog_data->uses_firstvertex) {
439 /* We're using the indirect buffer directly to source base instance &
440 * first vertex values. Mark the entire area as used.
441 */
442 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
443 indirect_data_addr,
444 indirect_data_stride * max_draw_count);
445 }
446
447 if (vs_prog_data->uses_drawid) {
448 /* Mark the whole draw id buffer as used. */
449 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
450 draw_id_addr,
451 sizeof(uint32_t) * max_draw_count);
452 }
453 #endif
454
455 /* Apply the pipeline flush here so the indirect data is available for the
456 * generation shader.
457 */
458 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
459
460 trace_intel_begin_generate_draws(&cmd_buffer->trace);
461
462 /***
463 * This is where the command buffer below will jump back to if we need to
464 * generate more draws.
465 */
466 struct anv_address gen_addr = anv_batch_current_address(&cmd_buffer->batch);
467
468 struct anv_simple_shader simple_state = (struct anv_simple_shader) {
469 .device = device,
470 .cmd_buffer = cmd_buffer,
471 .dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
472 .general_state_stream = &cmd_buffer->general_state_stream,
473 .batch = &cmd_buffer->batch,
474 .kernel = device->internal_kernels[
475 ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
476 .l3_config = device->internal_kernels_l3_config,
477 .urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
478 };
479 genX(emit_simple_shader_init)(&simple_state);
480
481 struct anv_state params_state =
482 genX(cmd_buffer_emit_generate_draws)(
483 cmd_buffer,
484 &simple_state,
485 draw_cmds_addr,
486 draw_cmd_stride,
487 indirect_data_addr,
488 indirect_data_stride,
489 draw_id_addr,
490 0 /* item_base */,
491 MIN2(MAX_RING_BO_ITEMS, max_draw_count) /* item_count */,
492 count_addr,
493 max_draw_count,
494 indexed,
495 ring_count);
496 struct anv_gen_indirect_params *params = params_state.map;
497
498 anv_add_pending_pipe_bits(cmd_buffer,
499 #if GFX_VER == 9
500 ANV_PIPE_VF_CACHE_INVALIDATE_BIT |
501 #endif
502 ANV_PIPE_DATA_CACHE_FLUSH_BIT |
503 ANV_PIPE_CS_STALL_BIT,
504 "after generation flush");
505
506 trace_intel_end_generate_draws(&cmd_buffer->trace);
507
508 if (cmd_buffer->state.conditional_render_enabled)
509 genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
510
511 /* Emit the 3D state in the main batch. */
512 genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
513
514 if (max_draw_count > 0) {
515 #if GFX_VER >= 12
516 /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
517 * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
518 */
519 anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
520 arb.PreParserDisableMask = true;
521 arb.PreParserDisable = true;
522 }
523 #endif
524
525 /* Jump into the ring buffer. */
526 anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
527 bbs.AddressSpaceIndicator = ASI_PPGTT;
528 bbs.BatchBufferStartAddress = (struct anv_address) {
529 .bo = cmd_buffer->generation.ring_bo,
530 };
531 }
532
533 /***
534 * This is the location at which the ring buffer jumps to if it needs to
535 * generate more draw calls. We do the following :
536 * - wait for draws in the ring buffer to complete (cs stall) so we're
537 * sure the push constant data we're about to edit is not read anymore
538 * - increment the base draw number by the number of draws
539 * executed in the ring
540 * - invalidate the constant cache since the
541 * anv_generated_indirect_params::draw::draw_base is updated
542 * - jump back to the generation shader
543 */
544 struct anv_address inc_addr =
545 anv_batch_current_address(&cmd_buffer->batch);
546
547 anv_add_pending_pipe_bits(cmd_buffer,
548 ANV_PIPE_STALL_AT_SCOREBOARD_BIT |
549 ANV_PIPE_CS_STALL_BIT,
550 "after generated draws batch");
551 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
552
553 struct mi_builder b;
554 mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
555
556 struct anv_address draw_base_addr = anv_address_add(
557 genX(simple_shader_push_state_address)(
558 &simple_state, params_state),
559 offsetof(struct anv_gen_indirect_params, draw_base));
560
561 const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device,
562 &draw_base_addr);
563 mi_builder_set_mocs(&b, mocs);
564
565 mi_store(&b, mi_mem32(draw_base_addr),
566 mi_iadd(&b, mi_mem32(draw_base_addr),
567 mi_imm(ring_count)));
568
569 anv_add_pending_pipe_bits(cmd_buffer,
570 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
571 "after generated draws batch increment");
572 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
573
574 anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
575 bbs.AddressSpaceIndicator = ASI_PPGTT;
576 bbs.BatchBufferStartAddress = gen_addr;
577 }
578
579 /***
580 * This is the location at which the ring buffer jump to once all the draw
581 * calls have executed.
582 */
583 struct anv_address end_addr = anv_batch_current_address(&cmd_buffer->batch);
584
585 /* Reset the draw_base field in case we ever replay the command buffer. */
586 mi_store(&b, mi_mem32(draw_base_addr), mi_imm(0));
587
588 anv_add_pending_pipe_bits(cmd_buffer,
589 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
590 "after generated draws end");
591
592 params->gen_addr = anv_address_physical(inc_addr);
593 params->end_addr = anv_address_physical(end_addr);
594 }
595 }
596
597 static void
genX(cmd_buffer_emit_indirect_generated_draws)598 genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
599 struct anv_address indirect_data_addr,
600 uint32_t indirect_data_stride,
601 struct anv_address count_addr,
602 uint32_t max_draw_count,
603 bool indexed)
604 {
605 /* In order to have the vertex fetch gather the data we need to have a non
606 * 0 stride. It's possible to have a 0 stride given by the application when
607 * draw_count is 1, but we need a correct value for the
608 * VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
609 * correctly :
610 *
611 * Vulkan spec, vkCmdDrawIndirect:
612 *
613 * "If drawCount is less than or equal to one, stride is ignored."
614 */
615 assert(indirect_data_stride > 0);
616
617 const bool use_ring_buffer = max_draw_count >=
618 cmd_buffer->device->physical->instance->generated_indirect_ring_threshold;
619 if (use_ring_buffer) {
620 genX(cmd_buffer_emit_indirect_generated_draws_inring)(cmd_buffer,
621 indirect_data_addr,
622 indirect_data_stride,
623 count_addr,
624 max_draw_count,
625 indexed);
626 } else {
627 genX(cmd_buffer_emit_indirect_generated_draws_inplace)(cmd_buffer,
628 indirect_data_addr,
629 indirect_data_stride,
630 count_addr,
631 max_draw_count,
632 indexed);
633 }
634 }
635
636 #endif /* GENX_CMD_DRAW_GENERATED_INDIRECT_H */
637