• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef GENX_CMD_DRAW_GENERATED_INDIRECT_H
25 #define GENX_CMD_DRAW_GENERATED_INDIRECT_H
26 
27 #include <assert.h>
28 #include <stdbool.h>
29 
30 #include "util/macros.h"
31 
32 #include "common/intel_genX_state_brw.h"
33 
34 #include "anv_private.h"
35 #include "anv_internal_kernels.h"
36 
37 /* This is a maximum number of items a fragment shader can generate due to the
38  * viewport size.
39  */
40 #define MAX_GENERATED_DRAW_COUNT (8192 * 8192)
41 
42 #define MAX_RING_BO_ITEMS (8192)
43 
44 static struct anv_state
genX(cmd_buffer_emit_generate_draws)45 genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
46                                      struct anv_simple_shader *simple_state,
47                                      struct anv_address generated_cmds_addr,
48                                      uint32_t generated_cmd_stride,
49                                      struct anv_address indirect_data_addr,
50                                      uint32_t indirect_data_stride,
51                                      struct anv_address draw_id_addr,
52                                      uint32_t item_base,
53                                      uint32_t item_count,
54                                      struct anv_address count_addr,
55                                      uint32_t max_count,
56                                      bool indexed,
57                                      uint32_t ring_count)
58 {
59    struct anv_device *device = cmd_buffer->device;
60 
61    struct anv_state push_data_state =
62       genX(simple_shader_alloc_push)(simple_state,
63                                      sizeof(struct anv_gen_indirect_params));
64    if (push_data_state.map == NULL)
65       return ANV_STATE_NULL;
66 
67    struct anv_graphics_pipeline *pipeline =
68       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
69    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
70    const bool use_tbimr = cmd_buffer->state.gfx.dyn_state.use_tbimr;
71 
72    struct anv_address draw_count_addr;
73    if (anv_address_is_null(count_addr)) {
74       draw_count_addr = anv_address_add(
75          genX(simple_shader_push_state_address)(simple_state, push_data_state),
76          offsetof(struct anv_gen_indirect_params, draw_count));
77    } else {
78       draw_count_addr = count_addr;
79    }
80 
81    struct anv_gen_indirect_params *push_data = push_data_state.map;
82    *push_data = (struct anv_gen_indirect_params) {
83       .draw_id_addr           = anv_address_physical(draw_id_addr),
84       .indirect_data_addr     = anv_address_physical(indirect_data_addr),
85       .indirect_data_stride   = indirect_data_stride,
86       .flags                  = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) |
87                                 (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
88                                 (cmd_buffer->state.conditional_render_enabled ?
89                                  ANV_GENERATED_FLAG_PREDICATED : 0) |
90                                 ((vs_prog_data->uses_firstvertex ||
91                                   vs_prog_data->uses_baseinstance) ?
92                                  ANV_GENERATED_FLAG_BASE : 0) |
93                                 (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
94                                 (anv_mocs(device, indirect_data_addr.bo,
95                                           ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
96                                 (!anv_address_is_null(count_addr) ?
97                                  ANV_GENERATED_FLAG_COUNT : 0) |
98                                 (ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) |
99                                 ((generated_cmd_stride / 4) << 16),
100       .draw_base              = item_base,
101       .max_draw_count         = max_count,
102       .ring_count             = ring_count,
103       .instance_multiplier    = pipeline->instance_multiplier,
104       .draw_count             = anv_address_is_null(count_addr) ? max_count : 0,
105       .generated_cmds_addr    = anv_address_physical(generated_cmds_addr),
106       .draw_count_addr        = anv_address_physical(draw_count_addr),
107    };
108 
109    genX(emit_simple_shader_dispatch)(simple_state, item_count, push_data_state);
110 
111    return push_data_state;
112 }
113 
114 static void
genX(cmd_buffer_emit_indirect_generated_draws_init)115 genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
116 {
117    anv_batch_emit_ensure_space(&cmd_buffer->generation.batch, 4);
118 
119    trace_intel_begin_generate_draws(&cmd_buffer->trace);
120 
121    anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
122       bbs.AddressSpaceIndicator = ASI_PPGTT;
123       bbs.BatchBufferStartAddress =
124          anv_batch_current_address(&cmd_buffer->generation.batch);
125    }
126 
127    cmd_buffer->generation.return_addr = anv_batch_current_address(&cmd_buffer->batch);
128 
129 #if GFX_VER >= 12
130    anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
131       arb.PreParserDisableMask = true;
132       arb.PreParserDisable = false;
133    }
134 #endif
135 
136    trace_intel_end_generate_draws(&cmd_buffer->trace);
137 
138    struct anv_device *device = cmd_buffer->device;
139    struct anv_simple_shader *state = &cmd_buffer->generation.shader_state;
140    *state = (struct anv_simple_shader) {
141       .device               = device,
142       .cmd_buffer           = cmd_buffer,
143       .dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
144       .general_state_stream = &cmd_buffer->general_state_stream,
145       .batch                = &cmd_buffer->generation.batch,
146       .kernel               = device->internal_kernels[
147          ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
148       .l3_config            = device->internal_kernels_l3_config,
149       .urb_cfg              = &cmd_buffer->state.gfx.urb_cfg,
150    };
151 
152    genX(emit_simple_shader_init)(state);
153 }
154 
155 static struct anv_address
genX(cmd_buffer_get_draw_id_addr)156 genX(cmd_buffer_get_draw_id_addr)(struct anv_cmd_buffer *cmd_buffer,
157                                   uint32_t draw_id_count)
158 {
159 #if GFX_VER >= 11
160    return ANV_NULL_ADDRESS;
161 #else
162    struct anv_graphics_pipeline *pipeline =
163       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
164    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
165    if (!vs_prog_data->uses_drawid)
166       return ANV_NULL_ADDRESS;
167 
168    struct anv_state draw_id_state =
169       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 4 * draw_id_count, 4);
170    return anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
171                                        draw_id_state);
172 #endif
173 }
174 
175 static uint32_t
genX(cmd_buffer_get_generated_draw_stride)176 genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer)
177 {
178    /* With the extended parameters in 3DPRIMITIVE on Gfx11+ we can emit
179     * everything. Prior to this, we need to emit a couple of
180     * VERTEX_BUFFER_STATE.
181     */
182 #if GFX_VER >= 11
183    return 4 * GENX(3DPRIMITIVE_EXTENDED_length);
184 #else
185    struct anv_graphics_pipeline *pipeline =
186       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
187    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
188 
189    uint32_t len = 0;
190 
191    if (vs_prog_data->uses_firstvertex ||
192        vs_prog_data->uses_baseinstance ||
193        vs_prog_data->uses_drawid) {
194       len += 4; /* 3DSTATE_VERTEX_BUFFERS */
195 
196       if (vs_prog_data->uses_firstvertex ||
197           vs_prog_data->uses_baseinstance)
198          len += 4 * GENX(VERTEX_BUFFER_STATE_length);
199 
200       if (vs_prog_data->uses_drawid)
201          len += 4 * GENX(VERTEX_BUFFER_STATE_length);
202    }
203 
204    return len + 4 * GENX(3DPRIMITIVE_length);
205 #endif
206 }
207 
208 static void
genX(cmd_buffer_rewrite_forward_end_addr)209 genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
210                                           struct anv_gen_indirect_params *params)
211 {
212    /* We don't know the end_addr until we have emitted all the generation
213     * draws. Go and edit the address of all the push parameters.
214     */
215    uint64_t end_addr =
216       anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
217    while (params != NULL) {
218       params->end_addr = end_addr;
219       params = params->prev;
220    }
221 }
222 
223 static void
genX(cmd_buffer_emit_indirect_generated_draws_inplace)224 genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cmd_buffer,
225                                                        struct anv_address indirect_data_addr,
226                                                        uint32_t indirect_data_stride,
227                                                        struct anv_address count_addr,
228                                                        uint32_t max_draw_count,
229                                                        bool indexed)
230 {
231    const bool start_generation_batch =
232       anv_address_is_null(cmd_buffer->generation.return_addr);
233 
234    genX(flush_pipeline_select_3d)(cmd_buffer);
235 
236    struct anv_address draw_id_addr =
237       genX(cmd_buffer_get_draw_id_addr)(cmd_buffer, max_draw_count);
238 
239 #if GFX_VER == 9
240    /* Mark the VB-0 as using the entire dynamic state pool area, but only for
241     * the draw call starting the generation batch. All the following ones will
242     * use the same area.
243     */
244    if (start_generation_batch) {
245       struct anv_device *device = cmd_buffer->device;
246       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
247          cmd_buffer, 0,
248          (struct anv_address) {
249             .offset = device->physical->va.dynamic_state_pool.addr,
250          },
251          device->physical->va.dynamic_state_pool.size);
252    }
253 
254    struct anv_graphics_pipeline *pipeline =
255       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
256    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
257 
258    if (vs_prog_data->uses_baseinstance ||
259        vs_prog_data->uses_firstvertex) {
260       /* We're using the indirect buffer directly to source base instance &
261        * first vertex values. Mark the entire area as used.
262        */
263       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
264                                                      indirect_data_addr,
265                                                      indirect_data_stride * max_draw_count);
266    }
267 
268    if (vs_prog_data->uses_drawid) {
269       /* Mark the whole draw id buffer as used. */
270       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
271                                                      draw_id_addr,
272                                                      sizeof(uint32_t) * max_draw_count);
273    }
274 #endif
275 
276    /* Apply the pipeline flush here so the indirect data is available for the
277     * generation shader.
278     */
279    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
280 
281    if (start_generation_batch)
282       genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
283 
284    if (cmd_buffer->state.conditional_render_enabled)
285       genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
286 
287    /* Emit the 3D state in the main batch. */
288    genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
289 
290    const uint32_t draw_cmd_stride =
291       genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
292 
293    struct anv_gen_indirect_params *last_params = NULL;
294    uint32_t item_base = 0;
295    while (item_base < max_draw_count) {
296       const uint32_t item_count = MIN2(max_draw_count - item_base,
297                                        MAX_GENERATED_DRAW_COUNT);
298       const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
299 
300       /* Ensure we have enough contiguous space for all the draws so that the
301        * compute shader can edit all the 3DPRIMITIVEs from a single base
302        * address.
303        *
304        * TODO: we might have to split that if the amount of space is to large (at
305        *       1Mb?).
306        */
307       VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
308                                                     draw_cmd_size);
309       if (result != VK_SUCCESS)
310          return;
311 
312       struct anv_state params_state =
313          genX(cmd_buffer_emit_generate_draws)(
314             cmd_buffer,
315             &cmd_buffer->generation.shader_state,
316             anv_batch_current_address(&cmd_buffer->batch),
317             draw_cmd_stride,
318             indirect_data_addr,
319             indirect_data_stride,
320             anv_address_add(draw_id_addr, 4 * item_base),
321             item_base,
322             item_count,
323             count_addr,
324             max_draw_count,
325             indexed,
326             0 /* ring_count */);
327       struct anv_gen_indirect_params *params = params_state.map;
328       if (params == NULL)
329          return;
330 
331       anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
332 
333       item_base += item_count;
334 
335       params->prev = last_params;
336       last_params = params;
337    }
338 
339    genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params);
340 
341 #if GFX_VER == 9
342    update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, indexed ? RANDOM : SEQUENTIAL);
343 #endif
344 }
345 
346 static void
genX(cmd_buffer_emit_indirect_generated_draws_inring)347 genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd_buffer,
348                                                       struct anv_address indirect_data_addr,
349                                                       uint32_t indirect_data_stride,
350                                                       struct anv_address count_addr,
351                                                       uint32_t max_draw_count,
352                                                       bool indexed)
353 {
354    struct anv_device *device = cmd_buffer->device;
355 
356    genX(flush_pipeline_select_3d)(cmd_buffer);
357 
358    const uint32_t draw_cmd_stride =
359       genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
360 
361    if (cmd_buffer->generation.ring_bo == NULL) {
362       const uint32_t bo_size = align(
363 #if GFX_VER >= 12
364          GENX(MI_ARB_CHECK_length) * 4 +
365 #endif
366          draw_cmd_stride * MAX_RING_BO_ITEMS +
367 #if GFX_VER == 9
368          4 * MAX_RING_BO_ITEMS +
369 #endif
370          GENX(MI_BATCH_BUFFER_START_length) * 4,
371          4096);
372       VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, bo_size,
373                                           &cmd_buffer->generation.ring_bo);
374       if (result != VK_SUCCESS) {
375          anv_batch_set_error(&cmd_buffer->batch, result);
376          return;
377       }
378    }
379 
380    /* How many items will be generated by each iteration of the generation
381     * shader dispatch.
382     */
383    const uint32_t ring_count = MIN2(MAX_RING_BO_ITEMS, max_draw_count);
384 
385    /* The ring bo has the following layout:
386     *
387     *   --------------------------------------------------
388     *   | MI_ARB_CHECK to resume CS prefetch (Gfx12+)    |
389     *   |------------------------------------------------|
390     *   |            ring_count * 3DPRIMITIVE            |
391     *   |------------------------------------------------|
392     *   | jump instruction (either back to generate more |
393     *   | commands or to the next set of commands)       |
394     *   |------------------------------------------------|
395     *   |          draw ids (only used on Gfx9)          |
396     *   --------------------------------------------------
397     */
398 
399    struct anv_address draw_id_addr = (struct anv_address) {
400       .bo     = cmd_buffer->generation.ring_bo,
401       .offset = ring_count * draw_cmd_stride +
402                 GENX(MI_BATCH_BUFFER_START_length) * 4,
403    };
404 
405    struct anv_address draw_cmds_addr = (struct anv_address) {
406       .bo = cmd_buffer->generation.ring_bo,
407 #if GFX_VER >= 12
408       .offset = GENX(MI_ARB_CHECK_length) * 4,
409 #endif
410    };
411 
412 #if GFX_VER >= 12
413    struct GENX(MI_ARB_CHECK) resume_prefetch = {
414       .PreParserDisableMask = true,
415       .PreParserDisable = false,
416    };
417    GENX(MI_ARB_CHECK_pack)(NULL, cmd_buffer->generation.ring_bo->map,
418                            &resume_prefetch);
419 #endif
420 
421 #if GFX_VER == 9
422    /* Mark the VB-0 as using the entire ring_bo, but only for the draw call
423     * starting the generation batch. All the following ones will use the same
424     * area.
425     */
426    genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
427       cmd_buffer, 0,
428       (struct anv_address) {
429          .bo = cmd_buffer->generation.ring_bo,
430       },
431       cmd_buffer->generation.ring_bo->size);
432 
433    struct anv_graphics_pipeline *pipeline =
434       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
435    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
436 
437    if (vs_prog_data->uses_baseinstance ||
438        vs_prog_data->uses_firstvertex) {
439       /* We're using the indirect buffer directly to source base instance &
440        * first vertex values. Mark the entire area as used.
441        */
442       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
443                                                      indirect_data_addr,
444                                                      indirect_data_stride * max_draw_count);
445    }
446 
447    if (vs_prog_data->uses_drawid) {
448       /* Mark the whole draw id buffer as used. */
449       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
450                                                      draw_id_addr,
451                                                      sizeof(uint32_t) * max_draw_count);
452    }
453 #endif
454 
455    /* Apply the pipeline flush here so the indirect data is available for the
456     * generation shader.
457     */
458    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
459 
460    trace_intel_begin_generate_draws(&cmd_buffer->trace);
461 
462    /***
463     * This is where the command buffer below will jump back to if we need to
464     * generate more draws.
465     */
466    struct anv_address gen_addr = anv_batch_current_address(&cmd_buffer->batch);
467 
468    struct anv_simple_shader simple_state = (struct anv_simple_shader) {
469       .device               = device,
470       .cmd_buffer           = cmd_buffer,
471       .dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
472       .general_state_stream = &cmd_buffer->general_state_stream,
473       .batch                = &cmd_buffer->batch,
474       .kernel               = device->internal_kernels[
475          ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
476       .l3_config            = device->internal_kernels_l3_config,
477       .urb_cfg              = &cmd_buffer->state.gfx.urb_cfg,
478    };
479    genX(emit_simple_shader_init)(&simple_state);
480 
481    struct anv_state params_state =
482       genX(cmd_buffer_emit_generate_draws)(
483          cmd_buffer,
484          &simple_state,
485          draw_cmds_addr,
486          draw_cmd_stride,
487          indirect_data_addr,
488          indirect_data_stride,
489          draw_id_addr,
490          0 /* item_base */,
491          MIN2(MAX_RING_BO_ITEMS, max_draw_count) /* item_count */,
492          count_addr,
493          max_draw_count,
494          indexed,
495          ring_count);
496    struct anv_gen_indirect_params *params = params_state.map;
497 
498    anv_add_pending_pipe_bits(cmd_buffer,
499 #if GFX_VER == 9
500                              ANV_PIPE_VF_CACHE_INVALIDATE_BIT |
501 #endif
502                              ANV_PIPE_DATA_CACHE_FLUSH_BIT |
503                              ANV_PIPE_CS_STALL_BIT,
504                              "after generation flush");
505 
506    trace_intel_end_generate_draws(&cmd_buffer->trace);
507 
508    if (cmd_buffer->state.conditional_render_enabled)
509       genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
510 
511    /* Emit the 3D state in the main batch. */
512    genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
513 
514    if (max_draw_count > 0) {
515 #if GFX_VER >= 12
516       /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
517        * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
518        */
519       anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
520          arb.PreParserDisableMask = true;
521          arb.PreParserDisable = true;
522       }
523 #endif
524 
525       /* Jump into the ring buffer. */
526       anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
527          bbs.AddressSpaceIndicator = ASI_PPGTT;
528          bbs.BatchBufferStartAddress = (struct anv_address) {
529             .bo = cmd_buffer->generation.ring_bo,
530          };
531       }
532 
533       /***
534        * This is the location at which the ring buffer jumps to if it needs to
535        * generate more draw calls. We do the following :
536        *    - wait for draws in the ring buffer to complete (cs stall) so we're
537        *      sure the push constant data we're about to edit is not read anymore
538        *    - increment the base draw number by the number of draws
539        *      executed in the ring
540        *    - invalidate the constant cache since the
541        *      anv_generated_indirect_params::draw::draw_base is updated
542        *    - jump back to the generation shader
543        */
544       struct anv_address inc_addr =
545          anv_batch_current_address(&cmd_buffer->batch);
546 
547       anv_add_pending_pipe_bits(cmd_buffer,
548                                 ANV_PIPE_STALL_AT_SCOREBOARD_BIT |
549                                 ANV_PIPE_CS_STALL_BIT,
550                                 "after generated draws batch");
551       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
552 
553       struct mi_builder b;
554       mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
555 
556       struct anv_address draw_base_addr = anv_address_add(
557          genX(simple_shader_push_state_address)(
558             &simple_state, params_state),
559          offsetof(struct anv_gen_indirect_params, draw_base));
560 
561       const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device,
562                                                  &draw_base_addr);
563       mi_builder_set_mocs(&b, mocs);
564 
565       mi_store(&b, mi_mem32(draw_base_addr),
566                    mi_iadd(&b, mi_mem32(draw_base_addr),
567                                mi_imm(ring_count)));
568 
569       anv_add_pending_pipe_bits(cmd_buffer,
570                                 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
571                                 "after generated draws batch increment");
572       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
573 
574       anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
575          bbs.AddressSpaceIndicator = ASI_PPGTT;
576          bbs.BatchBufferStartAddress = gen_addr;
577       }
578 
579       /***
580        * This is the location at which the ring buffer jump to once all the draw
581        * calls have executed.
582        */
583       struct anv_address end_addr = anv_batch_current_address(&cmd_buffer->batch);
584 
585       /* Reset the draw_base field in case we ever replay the command buffer. */
586       mi_store(&b, mi_mem32(draw_base_addr), mi_imm(0));
587 
588       anv_add_pending_pipe_bits(cmd_buffer,
589                                 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
590                                 "after generated draws end");
591 
592       params->gen_addr = anv_address_physical(inc_addr);
593       params->end_addr = anv_address_physical(end_addr);
594    }
595 }
596 
597 static void
genX(cmd_buffer_emit_indirect_generated_draws)598 genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
599                                                struct anv_address indirect_data_addr,
600                                                uint32_t indirect_data_stride,
601                                                struct anv_address count_addr,
602                                                uint32_t max_draw_count,
603                                                bool indexed)
604 {
605    /* In order to have the vertex fetch gather the data we need to have a non
606     * 0 stride. It's possible to have a 0 stride given by the application when
607     * draw_count is 1, but we need a correct value for the
608     * VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
609     * correctly :
610     *
611     * Vulkan spec, vkCmdDrawIndirect:
612     *
613     *   "If drawCount is less than or equal to one, stride is ignored."
614     */
615    assert(indirect_data_stride > 0);
616 
617    const bool use_ring_buffer = max_draw_count >=
618       cmd_buffer->device->physical->instance->generated_indirect_ring_threshold;
619    if (use_ring_buffer) {
620       genX(cmd_buffer_emit_indirect_generated_draws_inring)(cmd_buffer,
621                                                             indirect_data_addr,
622                                                             indirect_data_stride,
623                                                             count_addr,
624                                                             max_draw_count,
625                                                             indexed);
626    } else {
627       genX(cmd_buffer_emit_indirect_generated_draws_inplace)(cmd_buffer,
628                                                              indirect_data_addr,
629                                                              indirect_data_stride,
630                                                              count_addr,
631                                                              max_draw_count,
632                                                              indexed);
633    }
634 }
635 
636 #endif /* GENX_CMD_DRAW_GENERATED_INDIRECT_H */
637