• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "radv_meta.h"
25 #include "radv_private.h"
26 
27 #include "nir_builder.h"
28 
29 static void
radv_get_sequence_size(const struct radv_indirect_command_layout * layout,const struct radv_graphics_pipeline * pipeline,uint32_t * cmd_size,uint32_t * upload_size)30 radv_get_sequence_size(const struct radv_indirect_command_layout *layout,
31                        const struct radv_graphics_pipeline *pipeline, uint32_t *cmd_size,
32                        uint32_t *upload_size)
33 {
34    *cmd_size = 0;
35    *upload_size = 0;
36 
37    if (layout->bind_vbo_mask) {
38       *upload_size += 16 * util_bitcount(pipeline->vb_desc_usage_mask);
39 
40      /* One PKT3_SET_SH_REG for emitting VBO pointer (32-bit) */
41       *cmd_size += 3 * 4;
42    }
43 
44    if (layout->push_constant_mask) {
45       bool need_copy = false;
46 
47       for (unsigned i = 0; i < ARRAY_SIZE(pipeline->base.shaders); ++i) {
48          if (!pipeline->base.shaders[i])
49             continue;
50 
51          struct radv_userdata_locations *locs = &pipeline->base.shaders[i]->info.user_sgprs_locs;
52          if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
53             /* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */
54             *cmd_size += 3 * 4;
55             need_copy = true;
56          }
57          if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0)
58             /* One PKT3_SET_SH_REG writing all inline push constants. */
59             *cmd_size += (2 + locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].num_sgprs) * 4;
60       }
61       if (need_copy)
62          *upload_size +=
63             align(pipeline->base.push_constant_size + 16 * pipeline->base.dynamic_offset_count, 16);
64    }
65 
66    if (layout->binds_index_buffer) {
67       /* Index type write (normal reg write) + index buffer base write (64-bits, but special packet
68        * so only 1 word overhead) + index buffer size (again, special packet so only 1 word
69        * overhead)
70        */
71       *cmd_size += (3 + 3 + 2) * 4;
72    }
73 
74    if (layout->indexed) {
75       /* userdata writes + instance count + indexed draw */
76       *cmd_size += (5 + 2 + 5) * 4;
77    } else {
78       /* userdata writes + instance count + non-indexed draw */
79       *cmd_size += (5 + 2 + 3) * 4;
80    }
81 
82    if (layout->binds_state) {
83       /* One PKT3_SET_CONTEXT_REG (PA_SU_SC_MODE_CNTL) */
84       *cmd_size += 3 * 4;
85 
86       if (pipeline->base.device->physical_device->rad_info.has_gfx9_scissor_bug) {
87          /* 1 reg write of 4 regs + 1 reg write of 2 regs per scissor */
88          *cmd_size += (8 + 2 * MAX_SCISSORS) * 4;
89       }
90    }
91 }
92 
93 static uint32_t
radv_align_cmdbuf_size(uint32_t size)94 radv_align_cmdbuf_size(uint32_t size)
95 {
96    return align(MAX2(1, size), 256);
97 }
98 
99 uint32_t
radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV * cmd_info)100 radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info)
101 {
102    VK_FROM_HANDLE(radv_indirect_command_layout, layout, cmd_info->indirectCommandsLayout);
103    VK_FROM_HANDLE(radv_pipeline, pipeline, cmd_info->pipeline);
104    struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
105 
106    uint32_t cmd_size, upload_size;
107    radv_get_sequence_size(layout, graphics_pipeline, &cmd_size, &upload_size);
108    return radv_align_cmdbuf_size(cmd_size * cmd_info->sequencesCount);
109 }
110 
111 enum radv_dgc_token_type {
112    RADV_DGC_INDEX_BUFFER,
113    RADV_DGC_DRAW,
114    RADV_DGC_INDEXED_DRAW,
115 };
116 
117 struct radv_dgc_token {
118    uint16_t type; /* enum radv_dgc_token_type, but making the size explicit */
119    uint16_t offset; /* offset in the input stream */
120    union {
121       struct {
122          uint16_t vtx_base_sgpr;
123       } draw;
124       struct {
125          uint16_t index_size;
126          uint16_t vtx_base_sgpr;
127          uint32_t max_index_count;
128       } indexed_draw;
129    };
130 };
131 
132 struct radv_dgc_params {
133    uint32_t cmd_buf_stride;
134    uint32_t cmd_buf_size;
135    uint32_t upload_stride;
136    uint32_t upload_addr;
137    uint32_t sequence_count;
138    uint32_t stream_stride;
139 
140    /* draw info */
141    uint16_t draw_indexed;
142    uint16_t draw_params_offset;
143    uint16_t base_index_size;
144    uint16_t vtx_base_sgpr;
145    uint32_t max_index_count;
146 
147    /* bind index buffer info. Valid if base_index_size == 0 && draw_indexed */
148    uint16_t index_buffer_offset;
149 
150    /* Top bit is DGC_DYNAMIC_VERTEX_INPUT */
151    uint8_t vbo_cnt;
152 
153    uint8_t const_copy;
154 
155    /* Which VBOs are set in this indirect layout. */
156    uint32_t vbo_bind_mask;
157 
158    uint16_t vbo_reg;
159    uint16_t const_copy_size;
160 
161    uint64_t push_constant_mask;
162 
163    uint32_t ibo_type_32;
164    uint32_t ibo_type_8;
165 
166    uint16_t push_constant_shader_cnt;
167 
168    uint16_t emit_state;
169    uint32_t pa_su_sc_mode_cntl_base;
170    uint16_t state_offset;
171    uint16_t scissor_count;
172    uint16_t scissor_offset; /* in parameter buffer. */
173 };
174 
175 enum {
176    DGC_USES_DRAWID = 1u << 14,
177    DGC_USES_BASEINSTANCE = 1u << 15,
178 };
179 
180 enum {
181    DGC_DYNAMIC_STRIDE = 1u << 15,
182 };
183 
184 enum {
185    DGC_DYNAMIC_VERTEX_INPUT = 1u << 7,
186 };
187 
188 enum {
189    DGC_DESC_STREAM,
190    DGC_DESC_PREPARE,
191    DGC_DESC_PARAMS,
192    DGC_DESC_COUNT,
193    DGC_NUM_DESCS,
194 };
195 
196 struct dgc_cmdbuf {
197    nir_ssa_def *descriptor;
198    nir_variable *offset;
199 };
200 
201 static void
dgc_emit(nir_builder * b,struct dgc_cmdbuf * cs,nir_ssa_def * value)202 dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *value)
203 {
204    assert(value->bit_size >= 32);
205    nir_ssa_def *offset = nir_load_var(b, cs->offset);
206    nir_store_ssbo(b, value, cs->descriptor, offset,.access = ACCESS_NON_READABLE);
207    nir_store_var(b, cs->offset, nir_iadd_imm(b, offset, value->num_components * value->bit_size / 8), 0x1);
208 }
209 
210 
211 #define load_param32(b, field)                                                                     \
212    nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0),                                         \
213                           .base = offsetof(struct radv_dgc_params, field), .range = 4)
214 
215 #define load_param16(b, field)                                                                     \
216    nir_ubfe(                                                                                       \
217       (b),                                                                                         \
218       nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0),                                      \
219                              .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4),  \
220       nir_imm_int((b), (offsetof(struct radv_dgc_params, field) & 2) * 8), nir_imm_int((b), 16))
221 
222 #define load_param8(b, field)                                                                      \
223    nir_ubfe(                                                                                       \
224       (b),                                                                                         \
225       nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0),                                      \
226                              .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4),  \
227       nir_imm_int((b), (offsetof(struct radv_dgc_params, field) & 3) * 8), nir_imm_int((b), 8))
228 
229 #define load_param64(b, field)                                                                     \
230    nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0),                   \
231                           .base = offsetof(struct radv_dgc_params, field), .range = 8))
232 
233 static nir_ssa_def *
nir_pkt3(nir_builder * b,unsigned op,nir_ssa_def * len)234 nir_pkt3(nir_builder *b, unsigned op, nir_ssa_def *len)
235 {
236    len = nir_iand_imm(b, len, 0x3fff);
237    return nir_ior_imm(b, nir_ishl_imm(b, len, 16), PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op));
238 }
239 
240 static void
dgc_emit_userdata_vertex(nir_builder * b,struct dgc_cmdbuf * cs,nir_ssa_def * vtx_base_sgpr,nir_ssa_def * first_vertex,nir_ssa_def * first_instance,nir_ssa_def * drawid)241 dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr,
242                          nir_ssa_def *first_vertex, nir_ssa_def *first_instance, nir_ssa_def *drawid)
243 {
244    vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr);
245    nir_ssa_def *has_drawid =
246       nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
247    nir_ssa_def *has_baseinstance =
248       nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE);
249 
250    nir_ssa_def *pkt_cnt = nir_imm_int(b, 1);
251    pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
252    pkt_cnt = nir_bcsel(b, has_baseinstance, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
253 
254    nir_ssa_def *values[5] = {
255       nir_pkt3(b, PKT3_SET_SH_REG, pkt_cnt), nir_iand_imm(b, vtx_base_sgpr, 0x3FFF), first_vertex,
256       nir_imm_int(b, PKT3_NOP_PAD),          nir_imm_int(b, PKT3_NOP_PAD),
257    };
258 
259    values[3] = nir_bcsel(b, nir_ior(b, has_drawid, has_baseinstance),
260                          nir_bcsel(b, has_drawid, drawid, first_instance), values[4]);
261    values[4] = nir_bcsel(b, nir_iand(b, has_drawid, has_baseinstance), first_instance, values[4]);
262 
263    dgc_emit(b, cs, nir_vec(b, values, 5));
264 }
265 
266 static void
dgc_emit_instance_count(nir_builder * b,struct dgc_cmdbuf * cs,nir_ssa_def * instance_count)267 dgc_emit_instance_count(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *instance_count)
268 {
269    nir_ssa_def *values[2] = {nir_imm_int(b, PKT3(PKT3_NUM_INSTANCES, 0, false)), instance_count};
270 
271    dgc_emit(b, cs, nir_vec(b, values, 2));
272 }
273 
274 static void
dgc_emit_draw_indexed(nir_builder * b,struct dgc_cmdbuf * cs,nir_ssa_def * index_offset,nir_ssa_def * index_count,nir_ssa_def * max_index_count)275 dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset,
276                       nir_ssa_def *index_count, nir_ssa_def *max_index_count)
277 {
278    nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)),
279                              max_index_count, index_offset, index_count,
280                              nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)};
281 
282    dgc_emit(b, cs, nir_vec(b, values, 5));
283 }
284 
285 static void
dgc_emit_draw(nir_builder * b,struct dgc_cmdbuf * cs,nir_ssa_def * vertex_count)286 dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vertex_count)
287 {
288    nir_ssa_def *values[3] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_AUTO, 1, false)), vertex_count,
289                              nir_imm_int(b, V_0287F0_DI_SRC_SEL_AUTO_INDEX)};
290 
291    dgc_emit(b, cs, nir_vec(b, values, 3));
292 }
293 
294 static void
build_dgc_buffer_tail(nir_builder * b,nir_ssa_def * sequence_count)295 build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count)
296 {
297    nir_ssa_def *global_id = get_global_ids(b, 1);
298 
299    nir_ssa_def *cmd_buf_stride = load_param32(b, cmd_buf_stride);
300    nir_ssa_def *cmd_buf_size = load_param32(b, cmd_buf_size);
301 
302    nir_push_if(b, nir_ieq_imm(b, global_id, 0));
303    {
304       nir_ssa_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count);
305 
306       nir_variable *offset =
307          nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset");
308       nir_store_var(b, offset, cmd_buf_tail_start, 0x1);
309 
310       nir_ssa_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE);
311       nir_push_loop(b);
312       {
313          nir_ssa_def *curr_offset = nir_load_var(b, offset);
314          const unsigned MAX_PACKET_WORDS = 0x3FFC;
315 
316          nir_push_if(b, nir_ieq(b, curr_offset, cmd_buf_size));
317          {
318             nir_jump(b, nir_jump_break);
319          }
320          nir_pop_if(b, NULL);
321 
322          nir_ssa_def *packet_size = nir_isub(b, cmd_buf_size, curr_offset);
323          packet_size = nir_umin(b, packet_size, nir_imm_int(b, MAX_PACKET_WORDS * 4));
324 
325          nir_ssa_def *len = nir_ushr_imm(b, packet_size, 2);
326          len = nir_iadd_imm(b, len, -2);
327          nir_ssa_def *packet = nir_pkt3(b, PKT3_NOP, len);
328 
329          nir_store_ssbo(b, packet, dst_buf, curr_offset, .access = ACCESS_NON_READABLE);
330          nir_store_var(b, offset, nir_iadd(b, curr_offset, packet_size), 0x1);
331       }
332       nir_pop_loop(b, NULL);
333    }
334    nir_pop_if(b, NULL);
335 }
336 
337 static nir_shader *
build_dgc_prepare_shader(struct radv_device * dev)338 build_dgc_prepare_shader(struct radv_device *dev)
339 {
340    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare");
341    b.shader->info.workgroup_size[0] = 64;
342 
343    nir_ssa_def *global_id = get_global_ids(&b, 1);
344 
345    nir_ssa_def *sequence_id = global_id;
346 
347    nir_ssa_def *cmd_buf_stride = load_param32(&b, cmd_buf_stride);
348    nir_ssa_def *sequence_count = load_param32(&b, sequence_count);
349    nir_ssa_def *stream_stride = load_param32(&b, stream_stride);
350 
351    nir_variable *count_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "sequence_count");
352    nir_store_var(&b, count_var, sequence_count, 0x1);
353 
354    nir_push_if(&b, nir_ieq_imm(&b, sequence_count, UINT32_MAX));
355    {
356       nir_ssa_def *count_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_COUNT);
357       nir_ssa_def *cnt = nir_load_ssbo(&b, 1, 32, count_buf, nir_imm_int(&b, 0), .align_mul = 4);
358       nir_store_var(&b, count_var, cnt, 0x1);
359    }
360    nir_pop_if(&b, NULL);
361 
362    sequence_count = nir_load_var(&b, count_var);
363 
364    nir_push_if(&b, nir_ult(&b, sequence_id, sequence_count));
365    {
366       struct dgc_cmdbuf cmd_buf = {
367          .descriptor = radv_meta_load_descriptor(&b, 0, DGC_DESC_PREPARE),
368          .offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"),
369       };
370       nir_store_var(&b, cmd_buf.offset, nir_imul(&b, global_id, cmd_buf_stride), 1);
371       nir_ssa_def *cmd_buf_end = nir_iadd(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_stride);
372 
373       nir_ssa_def *stream_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_STREAM);
374       nir_ssa_def *stream_base = nir_imul(&b, sequence_id, stream_stride);
375 
376       nir_variable *upload_offset =
377          nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "upload_offset");
378       nir_store_var(&b, upload_offset,
379                     nir_iadd(&b, load_param32(&b, cmd_buf_size),
380                              nir_imul(&b, load_param32(&b, upload_stride), sequence_id)),
381                     0x1);
382 
383       nir_ssa_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask);
384       nir_ssa_def *vbo_cnt = nir_iand_imm(&b, load_param8(&b, vbo_cnt), 0x7F);
385       nir_push_if(&b, nir_ine_imm(&b, vbo_bind_mask, 0));
386       {
387          nir_variable *vbo_idx =
388             nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
389          nir_store_var(&b, vbo_idx, nir_imm_int(&b, 0), 0x1);
390 
391          nir_push_loop(&b);
392          {
393             nir_push_if(&b, nir_uge(&b, nir_load_var(&b, vbo_idx), vbo_cnt));
394             {
395                nir_jump(&b, nir_jump_break);
396             }
397             nir_pop_if(&b, NULL);
398 
399             nir_ssa_def *vbo_offset = nir_imul_imm(&b, nir_load_var(&b, vbo_idx), 16);
400             nir_variable *vbo_data =
401                nir_variable_create(b.shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data");
402 
403             nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS);
404             nir_store_var(&b, vbo_data,
405                           nir_load_ssbo(&b, 4, 32, param_buf, vbo_offset, .align_mul = 4), 0xf);
406 
407             nir_ssa_def *vbo_override =
408                nir_ine_imm(&b,
409                        nir_iand(&b, vbo_bind_mask,
410                                 nir_ishl(&b, nir_imm_int(&b, 1), nir_load_var(&b, vbo_idx))),
411                        0);
412             nir_push_if(&b, vbo_override);
413             {
414                nir_ssa_def *vbo_offset_offset =
415                   nir_iadd(&b, nir_imul_imm(&b, vbo_cnt, 16),
416                            nir_imul_imm(&b, nir_load_var(&b, vbo_idx), 8));
417                nir_ssa_def *vbo_over_data =
418                   nir_load_ssbo(&b, 2, 32, param_buf, vbo_offset_offset, .align_mul = 4);
419                nir_ssa_def *stream_offset = nir_iadd(
420                   &b, stream_base, nir_iand_imm(&b, nir_channel(&b, vbo_over_data, 0), 0x7FFF));
421                nir_ssa_def *stream_data =
422                   nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4);
423 
424                nir_ssa_def *va = nir_pack_64_2x32(&b, nir_channels(&b, stream_data, 0x3));
425                nir_ssa_def *size = nir_channel(&b, stream_data, 2);
426                nir_ssa_def *stride = nir_channel(&b, stream_data, 3);
427 
428                nir_ssa_def *vs_state_offset = nir_ubfe(&b, nir_channel(&b, vbo_over_data, 0), nir_imm_int(&b, 16), nir_imm_int(&b, 15));
429                va = nir_iadd(&b, va, nir_u2u64(&b, vs_state_offset));
430 
431                nir_ssa_def *dyn_stride = nir_test_mask(&b, nir_channel(&b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE);
432                nir_ssa_def *old_stride =
433                   nir_ubfe(&b, nir_channel(&b, nir_load_var(&b, vbo_data), 1), nir_imm_int(&b, 16),
434                            nir_imm_int(&b, 14));
435                stride = nir_bcsel(&b, dyn_stride, stride, old_stride);
436 
437                nir_ssa_def *use_per_attribute_vb_descs =
438                   nir_test_mask(&b, nir_channel(&b, vbo_over_data, 0), 1u << 31);
439                nir_variable *num_records = nir_variable_create(b.shader, nir_var_shader_temp,
440                                                                glsl_uint_type(), "num_records");
441                nir_store_var(&b, num_records, size, 0x1);
442 
443                nir_push_if(&b, use_per_attribute_vb_descs);
444                {
445                   nir_ssa_def *attrib_end = nir_ubfe(&b, nir_channel(&b, vbo_over_data, 1),
446                                                      nir_imm_int(&b, 16), nir_imm_int(&b, 16));
447                   nir_ssa_def *attrib_index_offset =
448                      nir_ubfe(&b, nir_channel(&b, vbo_over_data, 1), nir_imm_int(&b, 0),
449                               nir_imm_int(&b, 16));
450 
451                   nir_push_if(&b, nir_ult(&b, nir_load_var(&b, num_records), attrib_end));
452                   {
453                      nir_store_var(&b, num_records, nir_imm_int(&b, 0), 0x1);
454                   }
455                   nir_push_else(&b, NULL);
456                   nir_push_if(&b, nir_ieq_imm(&b, stride, 0));
457                   {
458                      nir_store_var(&b, num_records, nir_imm_int(&b, 1), 0x1);
459                   }
460                   nir_push_else(&b, NULL);
461                   {
462                      nir_ssa_def *r = nir_iadd(
463                         &b,
464                         nir_iadd_imm(
465                            &b,
466                            nir_udiv(&b, nir_isub(&b, nir_load_var(&b, num_records), attrib_end),
467                                     stride),
468                            1),
469                         attrib_index_offset);
470                      nir_store_var(&b, num_records, r, 0x1);
471                   }
472                   nir_pop_if(&b, NULL);
473                   nir_pop_if(&b, NULL);
474 
475                   nir_ssa_def *convert_cond =
476                      nir_ine_imm(&b, nir_load_var(&b, num_records), 0);
477                   if (dev->physical_device->rad_info.gfx_level == GFX9)
478                      convert_cond = nir_imm_bool(&b, false);
479                   else if (dev->physical_device->rad_info.gfx_level != GFX8)
480                      convert_cond =
481                         nir_iand(&b, convert_cond, nir_ieq_imm(&b, stride, 0));
482 
483                   nir_ssa_def *new_records = nir_iadd(
484                      &b, nir_imul(&b, nir_iadd_imm(&b, nir_load_var(&b, num_records), -1), stride),
485                      attrib_end);
486                   new_records =
487                      nir_bcsel(&b, convert_cond, new_records, nir_load_var(&b, num_records));
488                   nir_store_var(&b, num_records, new_records, 0x1);
489                }
490                nir_push_else(&b, NULL);
491                {
492                   if (dev->physical_device->rad_info.gfx_level != GFX8) {
493                      nir_push_if(&b, nir_ine_imm(&b, stride, 0));
494                      {
495                         nir_ssa_def *r = nir_iadd(&b, nir_load_var(&b, num_records),
496                                                   nir_iadd_imm(&b, stride, -1));
497                         nir_store_var(&b, num_records, nir_udiv(&b, r, stride), 0x1);
498                      }
499                      nir_pop_if(&b, NULL);
500                   }
501                }
502                nir_pop_if(&b, NULL);
503 
504                nir_ssa_def *rsrc_word3 = nir_channel(&b, nir_load_var(&b, vbo_data), 3);
505                if (dev->physical_device->rad_info.gfx_level >= GFX10) {
506                   nir_ssa_def *oob_select = nir_bcsel(
507                      &b, nir_ieq_imm(&b, stride, 0), nir_imm_int(&b, V_008F0C_OOB_SELECT_RAW),
508                      nir_imm_int(&b, V_008F0C_OOB_SELECT_STRUCTURED));
509                   rsrc_word3 = nir_iand_imm(&b, rsrc_word3, C_008F0C_OOB_SELECT);
510                   rsrc_word3 = nir_ior(&b, rsrc_word3, nir_ishl_imm(&b, oob_select, 28));
511                }
512 
513                nir_ssa_def *va_hi = nir_iand_imm(&b, nir_unpack_64_2x32_split_y(&b, va), 0xFFFF);
514                stride = nir_iand_imm(&b, stride, 0x3FFF);
515                nir_ssa_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(&b, va),
516                                                nir_ior(&b, nir_ishl_imm(&b, stride, 16), va_hi),
517                                                nir_load_var(&b, num_records), rsrc_word3};
518                nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf);
519             }
520             nir_pop_if(&b, NULL);
521 
522             /* On GFX9, it seems bounds checking is disabled if both
523              * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
524              * GFX10.3 but it doesn't hurt.
525              */
526             nir_ssa_def *num_records = nir_channel(&b, nir_load_var(&b, vbo_data), 2);
527             nir_ssa_def *buf_va = nir_iand_imm(
528                &b, nir_pack_64_2x32(&b, nir_channels(&b, nir_load_var(&b, vbo_data), 0x3)),
529                (1ull << 48) - 1ull);
530             nir_push_if(&b,
531                         nir_ior(&b, nir_ieq_imm(&b, num_records, 0), nir_ieq_imm(&b, buf_va, 0)));
532             {
533                nir_ssa_def *use_dynamic_vertex_input =
534                   nir_test_mask(&b, load_param8(&b, vbo_cnt), DGC_DYNAMIC_VERTEX_INPUT);
535 
536                nir_push_if(&b, use_dynamic_vertex_input);
537                {
538                   nir_ssa_def *new_vbo_data[4] = {
539                      nir_imm_int(&b, 0), nir_imm_int(&b, S_008F04_STRIDE(16)), nir_imm_int(&b, 0),
540                      nir_channel(&b, nir_load_var(&b, vbo_data), 3)};
541                   nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf);
542                }
543                nir_push_else(&b, NULL);
544                {
545                   nir_ssa_def *new_vbo_data[4] = {nir_imm_int(&b, 0), nir_imm_int(&b, 0),
546                                                   nir_imm_int(&b, 0), nir_imm_int(&b, 0)};
547                   nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf);
548                }
549                nir_pop_if(&b, NULL);
550             }
551             nir_pop_if(&b, NULL);
552 
553             nir_ssa_def *upload_off = nir_iadd(&b, nir_load_var(&b, upload_offset), vbo_offset);
554             nir_store_ssbo(&b, nir_load_var(&b, vbo_data), cmd_buf.descriptor, upload_off, .access = ACCESS_NON_READABLE);
555             nir_store_var(&b, vbo_idx, nir_iadd_imm(&b, nir_load_var(&b, vbo_idx), 1), 0x1);
556          }
557          nir_pop_loop(&b, NULL);
558          nir_ssa_def *packet[3] = {
559             nir_imm_int(&b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(&b, vbo_reg),
560             nir_iadd(&b, load_param32(&b, upload_addr), nir_load_var(&b, upload_offset))};
561 
562          dgc_emit(&b, &cmd_buf, nir_vec(&b, packet, 3));
563 
564          nir_store_var(&b, upload_offset,
565                        nir_iadd(&b, nir_load_var(&b, upload_offset), nir_imul_imm(&b, vbo_cnt, 16)),
566                        0x1);
567       }
568       nir_pop_if(&b, NULL);
569 
570 
571       nir_ssa_def *push_const_mask = load_param64(&b, push_constant_mask);
572       nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0));
573       {
574          nir_ssa_def *const_copy = nir_ine_imm(&b, load_param8(&b, const_copy), 0);
575          nir_ssa_def *const_copy_size = load_param16(&b, const_copy_size);
576          nir_ssa_def *const_copy_words = nir_ushr_imm(&b, const_copy_size, 2);
577          const_copy_words = nir_bcsel(&b, const_copy, const_copy_words, nir_imm_int(&b, 0));
578 
579          nir_variable *idx =
580             nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx");
581          nir_store_var(&b, idx, nir_imm_int(&b, 0), 0x1);
582 
583          nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS);
584          nir_ssa_def *param_offset = nir_imul_imm(&b, vbo_cnt, 24);
585          nir_ssa_def *param_offset_offset = nir_iadd_imm(&b, param_offset, MESA_VULKAN_SHADER_STAGES * 12);
586          nir_ssa_def *param_const_offset = nir_iadd_imm(&b, param_offset, MAX_PUSH_CONSTANTS_SIZE + MESA_VULKAN_SHADER_STAGES * 12);
587          nir_push_loop(&b);
588          {
589             nir_ssa_def *cur_idx = nir_load_var(&b, idx);
590             nir_push_if(&b, nir_uge(&b, cur_idx, const_copy_words));
591             {
592                nir_jump(&b, nir_jump_break);
593             }
594             nir_pop_if(&b, NULL);
595 
596             nir_variable *data = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
597 
598             nir_ssa_def *update = nir_iand(&b, push_const_mask, nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx));
599             update = nir_bcsel(
600                &b, nir_ult(&b, cur_idx, nir_imm_int(&b, 64 /* bits in push_const_mask */)), update,
601                nir_imm_int64(&b, 0));
602 
603             nir_push_if(&b, nir_ine_imm(&b, update, 0));
604             {
605                nir_ssa_def *stream_offset = nir_load_ssbo(
606                   &b, 1, 32, param_buf,
607                   nir_iadd(&b, param_offset_offset, nir_ishl_imm(&b, cur_idx, 2)), .align_mul = 4);
608                nir_ssa_def *new_data = nir_load_ssbo(&b, 1, 32, stream_buf, nir_iadd(&b, stream_base, stream_offset), .align_mul = 4);
609                nir_store_var(&b, data, new_data, 0x1);
610             }
611             nir_push_else(&b, NULL);
612             {
613                nir_store_var(
614                   &b, data,
615                   nir_load_ssbo(&b, 1, 32, param_buf,
616                                 nir_iadd(&b, param_const_offset, nir_ishl_imm(&b, cur_idx, 2)),
617                                 .align_mul = 4),
618                   0x1);
619             }
620             nir_pop_if(&b, NULL);
621 
622             nir_store_ssbo(
623                &b, nir_load_var(&b, data), cmd_buf.descriptor,
624                nir_iadd(&b, nir_load_var(&b, upload_offset), nir_ishl_imm(&b, cur_idx, 2)),
625                .access = ACCESS_NON_READABLE);
626 
627             nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1);
628          }
629          nir_pop_loop(&b, NULL);
630 
631          nir_variable *shader_idx =
632             nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx");
633          nir_store_var(&b, shader_idx, nir_imm_int(&b, 0), 0x1);
634          nir_ssa_def *shader_cnt = load_param16(&b, push_constant_shader_cnt);
635 
636          nir_push_loop(&b);
637          {
638             nir_ssa_def *cur_shader_idx = nir_load_var(&b, shader_idx);
639             nir_push_if(&b, nir_uge(&b, cur_shader_idx, shader_cnt));
640             {
641                nir_jump(&b, nir_jump_break);
642             }
643             nir_pop_if(&b, NULL);
644 
645             nir_ssa_def *reg_info = nir_load_ssbo(&b, 3, 32, param_buf, nir_iadd(&b, param_offset, nir_imul_imm(&b, cur_shader_idx, 12)), .align_mul = 4);
646             nir_ssa_def *upload_sgpr = nir_ubfe(&b, nir_channel(&b, reg_info, 0), nir_imm_int(&b, 0), nir_imm_int(&b, 16));
647             nir_ssa_def *inline_sgpr = nir_ubfe(&b, nir_channel(&b, reg_info, 0), nir_imm_int(&b, 16), nir_imm_int(&b, 16));
648             nir_ssa_def *inline_mask = nir_pack_64_2x32(&b, nir_channels(&b, reg_info, 0x6));
649 
650             nir_push_if(&b, nir_ine_imm(&b, upload_sgpr, 0));
651             {
652                nir_ssa_def *pkt[3] = {
653                   nir_imm_int(&b, PKT3(PKT3_SET_SH_REG, 1, 0)),
654                   upload_sgpr,
655                   nir_iadd(&b, load_param32(&b, upload_addr), nir_load_var(&b, upload_offset))
656                };
657 
658                dgc_emit(&b, &cmd_buf, nir_vec(&b, pkt, 3));
659             }
660             nir_pop_if(&b, NULL);
661 
662             nir_push_if(&b, nir_ine_imm(&b, inline_sgpr, 0));
663             {
664                nir_ssa_def *inline_len = nir_bit_count(&b, inline_mask);
665                nir_store_var(&b, idx, nir_imm_int(&b, 0), 0x1);
666 
667                nir_ssa_def *pkt[2] = {
668                   nir_pkt3(&b, PKT3_SET_SH_REG, inline_len),
669                   inline_sgpr
670                };
671 
672                dgc_emit(&b, &cmd_buf, nir_vec(&b, pkt, 2));
673 
674                nir_push_loop(&b);
675                {
676                   nir_ssa_def *cur_idx = nir_load_var(&b, idx);
677                   nir_push_if(&b,
678                               nir_uge(&b, cur_idx, nir_imm_int(&b, 64 /* bits in inline_mask */)));
679                   {
680                      nir_jump(&b, nir_jump_break);
681                   }
682                   nir_pop_if(&b, NULL);
683 
684                   nir_ssa_def *l = nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx);
685                   nir_push_if(&b, nir_ieq_imm(&b, nir_iand(&b, l, inline_mask), 0));
686                   {
687                      nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1);
688                      nir_jump(&b, nir_jump_continue);
689                   }
690                   nir_pop_if(&b, NULL);
691 
692                   nir_variable *data = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
693 
694                   nir_ssa_def *update = nir_iand(&b, push_const_mask, nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx));
695                   update = nir_bcsel(
696                      &b, nir_ult(&b, cur_idx, nir_imm_int(&b, 64 /* bits in push_const_mask */)),
697                      update, nir_imm_int64(&b, 0));
698 
699                   nir_push_if(&b, nir_ine_imm(&b, update, 0));
700                   {
701                      nir_ssa_def *stream_offset = nir_load_ssbo(
702                         &b, 1, 32, param_buf,
703                         nir_iadd(&b, param_offset_offset, nir_ishl_imm(&b, cur_idx, 2)),
704                         .align_mul = 4);
705                      nir_ssa_def *new_data = nir_load_ssbo(&b, 1, 32, stream_buf, nir_iadd(&b, stream_base, stream_offset), .align_mul = 4);
706                      nir_store_var(&b, data, new_data, 0x1);
707                   }
708                   nir_push_else(&b, NULL);
709                   {
710                      nir_store_var(&b, data,
711                                    nir_load_ssbo(&b, 1, 32, param_buf,
712                                                  nir_iadd(&b, param_const_offset,
713                                                           nir_ishl_imm(&b, cur_idx, 2)),
714                                                  .align_mul = 4),
715                                    0x1);
716                   }
717                   nir_pop_if(&b, NULL);
718 
719                   dgc_emit(&b, &cmd_buf, nir_load_var(&b, data));
720 
721                   nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1);
722                }
723                nir_pop_loop(&b, NULL);
724             }
725             nir_pop_if(&b, NULL);
726             nir_store_var(&b, shader_idx, nir_iadd_imm(&b, cur_shader_idx, 1), 0x1);
727          }
728          nir_pop_loop(&b, NULL);
729       }
730       nir_pop_if(&b, 0);
731 
732       nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, emit_state), 1));
733       {
734          nir_ssa_def *stream_offset = nir_iadd(&b, load_param16(&b, state_offset), stream_base);
735          nir_ssa_def *state = nir_load_ssbo(&b, 1, 32, stream_buf, stream_offset, .align_mul = 4);
736          state = nir_iand_imm(&b, state, 1);
737 
738          nir_ssa_def *reg =
739             nir_ior(&b, load_param32(&b, pa_su_sc_mode_cntl_base), nir_ishl_imm(&b, state, 2));
740 
741          nir_ssa_def *cmd_values[3] = {
742             nir_imm_int(&b, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)),
743             nir_imm_int(&b, (R_028814_PA_SU_SC_MODE_CNTL - SI_CONTEXT_REG_OFFSET) >> 2), reg};
744 
745          dgc_emit(&b, &cmd_buf, nir_vec(&b, cmd_values, 3));
746       }
747       nir_pop_if(&b, NULL);
748 
749       nir_ssa_def *scissor_count = load_param16(&b, scissor_count);
750       nir_push_if(&b, nir_ine_imm(&b, scissor_count, 0));
751       {
752          nir_ssa_def *scissor_offset = load_param16(&b, scissor_offset);
753          nir_variable *idx = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(),
754                                                  "scissor_copy_idx");
755          nir_store_var(&b, idx, nir_imm_int(&b, 0), 1);
756 
757          nir_push_loop(&b);
758          {
759             nir_ssa_def *cur_idx = nir_load_var(&b, idx);
760             nir_push_if(&b, nir_uge(&b, cur_idx, scissor_count));
761             {
762                nir_jump(&b, nir_jump_break);
763             }
764             nir_pop_if(&b, NULL);
765 
766             nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS);
767             nir_ssa_def *param_offset = nir_iadd(&b, scissor_offset, nir_imul_imm(&b, cur_idx, 4));
768             nir_ssa_def *value = nir_load_ssbo(&b, 1, 32, param_buf, param_offset, .align_mul = 4);
769 
770             dgc_emit(&b, &cmd_buf, value);
771 
772             nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 1);
773          }
774          nir_pop_loop(&b, NULL);
775       }
776       nir_pop_if(&b, NULL);
777 
778       nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, draw_indexed), 0));
779       {
780          nir_ssa_def *vtx_base_sgpr = load_param16(&b, vtx_base_sgpr);
781          nir_ssa_def *stream_offset =
782             nir_iadd(&b, load_param16(&b, draw_params_offset), stream_base);
783 
784          nir_ssa_def *draw_data0 =
785             nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4);
786          nir_ssa_def *vertex_count = nir_channel(&b, draw_data0, 0);
787          nir_ssa_def *instance_count = nir_channel(&b, draw_data0, 1);
788          nir_ssa_def *vertex_offset = nir_channel(&b, draw_data0, 2);
789          nir_ssa_def *first_instance = nir_channel(&b, draw_data0, 3);
790 
791          nir_push_if(&b, nir_iand(&b, nir_ine_imm(&b, vertex_count, 0), nir_ine_imm(&b, instance_count, 0)));
792          {
793             dgc_emit_userdata_vertex(&b, &cmd_buf, vtx_base_sgpr, vertex_offset, first_instance, sequence_id);
794             dgc_emit_instance_count(&b, &cmd_buf, instance_count);
795             dgc_emit_draw(&b, &cmd_buf, vertex_count);
796          }
797          nir_pop_if(&b, 0);
798       }
799       nir_push_else(&b, NULL);
800       {
801          nir_variable *index_size_var =
802             nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "index_size");
803          nir_store_var(&b, index_size_var, load_param16(&b, base_index_size), 0x1);
804          nir_variable *max_index_count_var =
805             nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "max_index_count");
806          nir_store_var(&b, max_index_count_var, load_param32(&b, max_index_count), 0x1);
807 
808          nir_ssa_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0);
809          nir_push_if(&b, bind_index_buffer);
810          {
811             nir_ssa_def *index_stream_offset =
812                nir_iadd(&b, load_param16(&b, index_buffer_offset), stream_base);
813             nir_ssa_def *data =
814                nir_load_ssbo(&b, 4, 32, stream_buf, index_stream_offset, .align_mul = 4);
815 
816             nir_ssa_def *vk_index_type = nir_channel(&b, data, 3);
817             nir_ssa_def *index_type = nir_bcsel(
818                &b, nir_ieq(&b, vk_index_type, load_param32(&b, ibo_type_32)),
819                nir_imm_int(&b, V_028A7C_VGT_INDEX_32), nir_imm_int(&b, V_028A7C_VGT_INDEX_16));
820             index_type = nir_bcsel(&b, nir_ieq(&b, vk_index_type, load_param32(&b, ibo_type_8)),
821                                    nir_imm_int(&b, V_028A7C_VGT_INDEX_8), index_type);
822 
823             nir_ssa_def *index_size = nir_iand_imm(
824                &b, nir_ushr(&b, nir_imm_int(&b, 0x142), nir_imul_imm(&b, index_type, 4)), 0xf);
825             nir_store_var(&b, index_size_var, index_size, 0x1);
826 
827             nir_ssa_def *max_index_count = nir_udiv(&b, nir_channel(&b, data, 2), index_size);
828             nir_store_var(&b, max_index_count_var, max_index_count, 0x1);
829 
830             nir_ssa_def *cmd_values[3 + 2 + 3];
831 
832             if (dev->physical_device->rad_info.gfx_level >= GFX9) {
833                unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
834                if (dev->physical_device->rad_info.gfx_level < GFX9 ||
835                    (dev->physical_device->rad_info.gfx_level == GFX9 &&
836                     dev->physical_device->rad_info.me_fw_version < 26))
837                   opcode = PKT3_SET_UCONFIG_REG;
838                cmd_values[0] = nir_imm_int(&b, PKT3(opcode, 1, 0));
839                cmd_values[1] = nir_imm_int(
840                   &b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28));
841                cmd_values[2] = index_type;
842             } else {
843                cmd_values[0] = nir_imm_int(&b, PKT3(PKT3_INDEX_TYPE, 0, 0));
844                cmd_values[1] = index_type;
845                cmd_values[2] = nir_imm_int(&b, PKT3_NOP_PAD);
846             }
847 
848             nir_ssa_def *addr_upper = nir_channel(&b, data, 1);
849             addr_upper = nir_ishr_imm(&b, nir_ishl_imm(&b, addr_upper, 16), 16);
850 
851             cmd_values[3] = nir_imm_int(&b, PKT3(PKT3_INDEX_BASE, 1, 0));
852             cmd_values[4] = nir_channel(&b, data, 0);
853             cmd_values[5] = addr_upper;
854             cmd_values[6] = nir_imm_int(&b, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
855             cmd_values[7] = max_index_count;
856 
857             dgc_emit(&b, &cmd_buf, nir_vec(&b, cmd_values, 8));
858          }
859          nir_pop_if(&b, NULL);
860 
861          nir_ssa_def *index_size = nir_load_var(&b, index_size_var);
862          nir_ssa_def *max_index_count = nir_load_var(&b, max_index_count_var);
863          nir_ssa_def *vtx_base_sgpr = load_param16(&b, vtx_base_sgpr);
864          nir_ssa_def *stream_offset =
865             nir_iadd(&b, load_param16(&b, draw_params_offset), stream_base);
866 
867          index_size =
868             nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size);
869          max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var),
870                                      max_index_count);
871          nir_ssa_def *draw_data0 =
872             nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4);
873          nir_ssa_def *draw_data1 = nir_load_ssbo(
874             &b, 1, 32, stream_buf, nir_iadd_imm(&b, stream_offset, 16), .align_mul = 4);
875          nir_ssa_def *index_count = nir_channel(&b, draw_data0, 0);
876          nir_ssa_def *instance_count = nir_channel(&b, draw_data0, 1);
877          nir_ssa_def *first_index = nir_channel(&b, draw_data0, 2);
878          nir_ssa_def *vertex_offset = nir_channel(&b, draw_data0, 3);
879          nir_ssa_def *first_instance = nir_channel(&b, draw_data1, 0);
880 
881          nir_push_if(&b, nir_iand(&b, nir_ine_imm(&b, index_count, 0), nir_ine_imm(&b, instance_count, 0)));
882          {
883             dgc_emit_userdata_vertex(&b, &cmd_buf, vtx_base_sgpr, vertex_offset, first_instance, sequence_id);
884             dgc_emit_instance_count(&b, &cmd_buf, instance_count);
885             dgc_emit_draw_indexed(&b, &cmd_buf, first_index, index_count,
886                                        max_index_count);
887          }
888          nir_pop_if(&b, 0);
889       }
890       nir_pop_if(&b, NULL);
891 
892       /* Pad the cmdbuffer if we did not use the whole stride */
893       nir_push_if(&b, nir_ine(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_end));
894       {
895          nir_ssa_def *cnt = nir_isub(&b, cmd_buf_end, nir_load_var(&b, cmd_buf.offset));
896          cnt = nir_ushr_imm(&b, cnt, 2);
897          cnt = nir_iadd_imm(&b, cnt, -2);
898          nir_ssa_def *pkt = nir_pkt3(&b, PKT3_NOP, cnt);
899 
900          dgc_emit(&b, &cmd_buf, pkt);
901       }
902       nir_pop_if(&b, NULL);
903    }
904    nir_pop_if(&b, NULL);
905 
906    build_dgc_buffer_tail(&b, sequence_count);
907    return b.shader;
908 }
909 
910 void
radv_device_finish_dgc_prepare_state(struct radv_device * device)911 radv_device_finish_dgc_prepare_state(struct radv_device *device)
912 {
913    radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.dgc_prepare.pipeline,
914                         &device->meta_state.alloc);
915    radv_DestroyPipelineLayout(radv_device_to_handle(device),
916                               device->meta_state.dgc_prepare.p_layout, &device->meta_state.alloc);
917    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
918                                                         device->meta_state.dgc_prepare.ds_layout,
919                                                         &device->meta_state.alloc);
920 }
921 
922 VkResult
radv_device_init_dgc_prepare_state(struct radv_device * device)923 radv_device_init_dgc_prepare_state(struct radv_device *device)
924 {
925    VkResult result;
926    nir_shader *cs = build_dgc_prepare_shader(device);
927 
928    VkDescriptorSetLayoutCreateInfo ds_create_info = {
929       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
930       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
931       .bindingCount = DGC_NUM_DESCS,
932       .pBindings = (VkDescriptorSetLayoutBinding[]){
933          {.binding = DGC_DESC_STREAM,
934           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
935           .descriptorCount = 1,
936           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
937           .pImmutableSamplers = NULL},
938          {.binding = DGC_DESC_PREPARE,
939           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
940           .descriptorCount = 1,
941           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
942           .pImmutableSamplers = NULL},
943          {.binding = DGC_DESC_PARAMS,
944           .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
945           .descriptorCount = 1,
946           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
947           .pImmutableSamplers = NULL},
948          {.binding = DGC_DESC_COUNT,
949           .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
950           .descriptorCount = 1,
951           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
952           .pImmutableSamplers = NULL},
953       }};
954 
955    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
956                                            &device->meta_state.alloc,
957                                            &device->meta_state.dgc_prepare.ds_layout);
958    if (result != VK_SUCCESS)
959       goto cleanup;
960 
961    const VkPipelineLayoutCreateInfo leaf_pl_create_info = {
962       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
963       .setLayoutCount = 1,
964       .pSetLayouts = &device->meta_state.dgc_prepare.ds_layout,
965       .pushConstantRangeCount = 1,
966       .pPushConstantRanges =
967          &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct radv_dgc_params)},
968    };
969 
970    result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info,
971                                       &device->meta_state.alloc,
972                                       &device->meta_state.dgc_prepare.p_layout);
973    if (result != VK_SUCCESS)
974       goto cleanup;
975 
976    VkPipelineShaderStageCreateInfo shader_stage = {
977       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
978       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
979       .module = vk_shader_module_handle_from_nir(cs),
980       .pName = "main",
981       .pSpecializationInfo = NULL,
982    };
983 
984    VkComputePipelineCreateInfo pipeline_info = {
985       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
986       .stage = shader_stage,
987       .flags = 0,
988       .layout = device->meta_state.dgc_prepare.p_layout,
989    };
990 
991    result = radv_CreateComputePipelines(
992       radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
993       &pipeline_info, &device->meta_state.alloc, &device->meta_state.dgc_prepare.pipeline);
994    if (result != VK_SUCCESS)
995       goto cleanup;
996 
997 cleanup:
998    ralloc_free(cs);
999    return result;
1000 }
1001 
1002 VkResult
radv_CreateIndirectCommandsLayoutNV(VkDevice _device,const VkIndirectCommandsLayoutCreateInfoNV * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkIndirectCommandsLayoutNV * pIndirectCommandsLayout)1003 radv_CreateIndirectCommandsLayoutNV(VkDevice _device,
1004                                     const VkIndirectCommandsLayoutCreateInfoNV *pCreateInfo,
1005                                     const VkAllocationCallbacks *pAllocator,
1006                                     VkIndirectCommandsLayoutNV *pIndirectCommandsLayout)
1007 {
1008    RADV_FROM_HANDLE(radv_device, device, _device);
1009    struct radv_indirect_command_layout *layout;
1010 
1011    size_t size =
1012       sizeof(*layout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV);
1013 
1014    layout =
1015       vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct radv_indirect_command_layout),
1016                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1017    if (!layout)
1018       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1019 
1020    vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV);
1021 
1022    layout->input_stride = pCreateInfo->pStreamStrides[0];
1023    layout->token_count = pCreateInfo->tokenCount;
1024    typed_memcpy(layout->tokens, pCreateInfo->pTokens, pCreateInfo->tokenCount);
1025 
1026    layout->ibo_type_32 = VK_INDEX_TYPE_UINT32;
1027    layout->ibo_type_8 = VK_INDEX_TYPE_UINT8_EXT;
1028 
1029    for (unsigned i = 0; i < pCreateInfo->tokenCount; ++i) {
1030       switch (pCreateInfo->pTokens[i].tokenType) {
1031       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV:
1032          layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
1033          break;
1034       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV:
1035          layout->indexed = true;
1036          layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
1037          break;
1038       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:
1039          layout->binds_index_buffer = true;
1040          layout->index_buffer_offset = pCreateInfo->pTokens[i].offset;
1041          /* 16-bit is implied if we find no match. */
1042          for (unsigned j = 0; j < pCreateInfo->pTokens[i].indexTypeCount; j++) {
1043             if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT32)
1044                layout->ibo_type_32 = pCreateInfo->pTokens[i].pIndexTypeValues[j];
1045             else if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT8_EXT)
1046                layout->ibo_type_8 = pCreateInfo->pTokens[i].pIndexTypeValues[j];
1047          }
1048          break;
1049       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV:
1050          layout->bind_vbo_mask |= 1u << pCreateInfo->pTokens[i].vertexBindingUnit;
1051          layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] =
1052             pCreateInfo->pTokens[i].offset;
1053          if (pCreateInfo->pTokens[i].vertexDynamicStride)
1054             layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] |= DGC_DYNAMIC_STRIDE;
1055          break;
1056       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
1057          for (unsigned j = pCreateInfo->pTokens[i].pushconstantOffset / 4, k = 0;
1058               k < pCreateInfo->pTokens[i].pushconstantSize / 4; ++j, ++k) {
1059             layout->push_constant_mask |= 1ull << j;
1060             layout->push_constant_offsets[j] = pCreateInfo->pTokens[i].offset + k * 4;
1061          }
1062          break;
1063       case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV:
1064          layout->binds_state = true;
1065          layout->state_offset = pCreateInfo->pTokens[i].offset;
1066          break;
1067       default:
1068          unreachable("Unhandled token type");
1069       }
1070    }
1071    if (!layout->indexed)
1072       layout->binds_index_buffer = false;
1073 
1074    *pIndirectCommandsLayout = radv_indirect_command_layout_to_handle(layout);
1075    return VK_SUCCESS;
1076 }
1077 
1078 void
radv_DestroyIndirectCommandsLayoutNV(VkDevice _device,VkIndirectCommandsLayoutNV indirectCommandsLayout,const VkAllocationCallbacks * pAllocator)1079 radv_DestroyIndirectCommandsLayoutNV(VkDevice _device,
1080                                      VkIndirectCommandsLayoutNV indirectCommandsLayout,
1081                                      const VkAllocationCallbacks *pAllocator)
1082 {
1083    RADV_FROM_HANDLE(radv_device, device, _device);
1084    VK_FROM_HANDLE(radv_indirect_command_layout, layout, indirectCommandsLayout);
1085 
1086    if (!layout)
1087       return;
1088 
1089    vk_object_base_finish(&layout->base);
1090    vk_free2(&device->vk.alloc, pAllocator, layout);
1091 }
1092 
1093 void
radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,const VkGeneratedCommandsMemoryRequirementsInfoNV * pInfo,VkMemoryRequirements2 * pMemoryRequirements)1094 radv_GetGeneratedCommandsMemoryRequirementsNV(
1095    VkDevice _device, const VkGeneratedCommandsMemoryRequirementsInfoNV *pInfo,
1096    VkMemoryRequirements2 *pMemoryRequirements)
1097 {
1098    RADV_FROM_HANDLE(radv_device, device, _device);
1099    VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout);
1100    VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline);
1101    struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1102 
1103    uint32_t cmd_stride, upload_stride;
1104    radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride);
1105 
1106    VkDeviceSize cmd_buf_size = radv_align_cmdbuf_size(cmd_stride * pInfo->maxSequencesCount);
1107    VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
1108 
1109    pMemoryRequirements->memoryRequirements.memoryTypeBits =
1110       device->physical_device->memory_types_32bit;
1111    pMemoryRequirements->memoryRequirements.alignment = 256;
1112    pMemoryRequirements->memoryRequirements.size =
1113       align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment);
1114 }
1115 
1116 void
radv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer,const VkGeneratedCommandsInfoNV * pGeneratedCommandsInfo)1117 radv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer,
1118                                       const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
1119 {
1120    /* Can't do anything here as we depend on some dynamic state in some cases that we only know
1121     * at draw time. */
1122 }
1123 
1124 /* Always need to call this directly before draw due to dependence on bound state. */
1125 void
radv_prepare_dgc(struct radv_cmd_buffer * cmd_buffer,const VkGeneratedCommandsInfoNV * pGeneratedCommandsInfo)1126 radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer,
1127                  const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
1128 {
1129    VK_FROM_HANDLE(radv_indirect_command_layout, layout,
1130                   pGeneratedCommandsInfo->indirectCommandsLayout);
1131    VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
1132    VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer);
1133    struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1134    struct radv_meta_saved_state saved_state;
1135    struct radv_buffer token_buffer;
1136 
1137    uint32_t cmd_stride, upload_stride;
1138    radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride);
1139 
1140    unsigned cmd_buf_size =
1141       radv_align_cmdbuf_size(cmd_stride * pGeneratedCommandsInfo->sequencesCount);
1142 
1143    unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(graphics_pipeline->vb_desc_usage_mask) * 24 : 0;
1144    unsigned const_size = graphics_pipeline->base.push_constant_size +
1145                          16 * graphics_pipeline->base.dynamic_offset_count +
1146                          sizeof(layout->push_constant_offsets) + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12;
1147    if (!layout->push_constant_mask)
1148       const_size = 0;
1149 
1150    unsigned scissor_size = (8 + 2 * cmd_buffer->state.dynamic.scissor.count) * 4;
1151    if (!layout->binds_state || !cmd_buffer->state.dynamic.scissor.count ||
1152        !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
1153       scissor_size = 0;
1154 
1155    unsigned upload_size = MAX2(vb_size + const_size + scissor_size, 16);
1156 
1157    void *upload_data;
1158    unsigned upload_offset;
1159    if (!radv_cmd_buffer_upload_alloc(cmd_buffer, upload_size, &upload_offset, &upload_data)) {
1160       cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1161       return;
1162    }
1163 
1164    void *upload_data_base = upload_data;
1165 
1166    radv_buffer_init(&token_buffer, cmd_buffer->device, cmd_buffer->upload.upload_bo, upload_size,
1167                     upload_offset);
1168 
1169    uint64_t upload_addr = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset +
1170                           pGeneratedCommandsInfo->preprocessOffset;
1171 
1172    uint16_t vtx_base_sgpr =
1173       (cmd_buffer->state.graphics_pipeline->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2;
1174    if (cmd_buffer->state.graphics_pipeline->uses_drawid)
1175       vtx_base_sgpr |= DGC_USES_DRAWID;
1176    if (cmd_buffer->state.graphics_pipeline->uses_baseinstance)
1177       vtx_base_sgpr |= DGC_USES_BASEINSTANCE;
1178 
1179    uint16_t vbo_sgpr =
1180       ((radv_lookup_user_sgpr(&graphics_pipeline->base, MESA_SHADER_VERTEX, AC_UD_VS_VERTEX_BUFFERS)->sgpr_idx * 4 +
1181         graphics_pipeline->base.user_data_0[MESA_SHADER_VERTEX]) -
1182        SI_SH_REG_OFFSET) >>
1183       2;
1184    struct radv_dgc_params params = {
1185       .cmd_buf_stride = cmd_stride,
1186       .cmd_buf_size = cmd_buf_size,
1187       .upload_addr = (uint32_t)upload_addr,
1188       .upload_stride = upload_stride,
1189       .sequence_count = pGeneratedCommandsInfo->sequencesCount,
1190       .stream_stride = layout->input_stride,
1191       .draw_indexed = layout->indexed,
1192       .draw_params_offset = layout->draw_params_offset,
1193       .base_index_size =
1194          layout->binds_index_buffer ? 0 : radv_get_vgt_index_size(cmd_buffer->state.index_type),
1195       .vtx_base_sgpr = vtx_base_sgpr,
1196       .max_index_count = cmd_buffer->state.max_index_count,
1197       .index_buffer_offset = layout->index_buffer_offset,
1198       .vbo_reg = vbo_sgpr,
1199       .ibo_type_32 = layout->ibo_type_32,
1200       .ibo_type_8 = layout->ibo_type_8,
1201       .emit_state = layout->binds_state,
1202       .pa_su_sc_mode_cntl_base = radv_get_pa_su_sc_mode_cntl(cmd_buffer) & C_028814_FACE,
1203       .state_offset = layout->state_offset,
1204    };
1205 
1206    if (layout->bind_vbo_mask) {
1207       radv_write_vertex_descriptors(cmd_buffer, graphics_pipeline, true, upload_data);
1208 
1209       uint32_t *vbo_info = (uint32_t *)((char *)upload_data + graphics_pipeline->vb_desc_alloc_size);
1210 
1211       struct radv_shader *vs_shader = radv_get_shader(&graphics_pipeline->base, MESA_SHADER_VERTEX);
1212       const struct radv_vs_input_state *vs_state =
1213          vs_shader->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL;
1214       uint32_t mask = graphics_pipeline->vb_desc_usage_mask;
1215       unsigned idx = 0;
1216       while (mask) {
1217          unsigned i = u_bit_scan(&mask);
1218          unsigned binding =
1219             vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i]
1220                      : (graphics_pipeline->use_per_attribute_vb_descs ? graphics_pipeline->attrib_bindings[i] : i);
1221          uint32_t attrib_end =
1222             vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : graphics_pipeline->attrib_ends[i];
1223 
1224          params.vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx;
1225          vbo_info[2 * idx] = ((graphics_pipeline->use_per_attribute_vb_descs ? 1u : 0u) << 31) |
1226                              (vs_state ? vs_state->offsets[i] << 16 : 0) |
1227                              layout->vbo_offsets[binding];
1228          vbo_info[2 * idx + 1] = graphics_pipeline->attrib_index_offset[i] | (attrib_end << 16);
1229          ++idx;
1230       }
1231       params.vbo_cnt = idx | (vs_state ? DGC_DYNAMIC_VERTEX_INPUT : 0);
1232       upload_data = (char *)upload_data + vb_size;
1233    }
1234 
1235    if (layout->push_constant_mask) {
1236       uint32_t *desc = upload_data;
1237       upload_data = (char *)upload_data + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12;
1238 
1239       unsigned idx = 0;
1240       for (unsigned i = 0; i < ARRAY_SIZE(graphics_pipeline->base.shaders); ++i) {
1241          if (!graphics_pipeline->base.shaders[i])
1242             continue;
1243 
1244          struct radv_userdata_locations *locs = &graphics_pipeline->base.shaders[i]->info.user_sgprs_locs;
1245          if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0)
1246             params.const_copy = 1;
1247 
1248          if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 ||
1249              locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
1250             unsigned upload_sgpr = 0;
1251             unsigned inline_sgpr = 0;
1252 
1253             if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
1254                upload_sgpr =
1255                   (graphics_pipeline->base.user_data_0[i] + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
1256                    SI_SH_REG_OFFSET) >>
1257                   2;
1258             }
1259 
1260             if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
1261                inline_sgpr = (graphics_pipeline->base.user_data_0[i] +
1262                               4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx -
1263                               SI_SH_REG_OFFSET) >>
1264                              2;
1265                desc[idx * 3 + 1] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask;
1266                desc[idx * 3 + 2] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask >> 32;
1267             }
1268             desc[idx * 3] = upload_sgpr | (inline_sgpr << 16);
1269             ++idx;
1270          }
1271       }
1272 
1273       params.push_constant_shader_cnt = idx;
1274 
1275       params.const_copy_size = graphics_pipeline->base.push_constant_size +
1276                                16 * graphics_pipeline->base.dynamic_offset_count;
1277       params.push_constant_mask = layout->push_constant_mask;
1278 
1279       memcpy(upload_data, layout->push_constant_offsets, sizeof(layout->push_constant_offsets));
1280       upload_data = (char *)upload_data + sizeof(layout->push_constant_offsets);
1281 
1282       memcpy(upload_data, cmd_buffer->push_constants, graphics_pipeline->base.push_constant_size);
1283       upload_data = (char *)upload_data + graphics_pipeline->base.push_constant_size;
1284 
1285       struct radv_descriptor_state *descriptors_state =
1286          radv_get_descriptors_state(cmd_buffer, pGeneratedCommandsInfo->pipelineBindPoint);
1287       memcpy(upload_data, descriptors_state->dynamic_buffers, 16 * graphics_pipeline->base.dynamic_offset_count);
1288       upload_data = (char *)upload_data + 16 * graphics_pipeline->base.dynamic_offset_count;
1289    }
1290 
1291    if (scissor_size) {
1292       params.scissor_offset = (char*)upload_data - (char*)upload_data_base;
1293       params.scissor_count = scissor_size / 4;
1294 
1295       struct radeon_cmdbuf scissor_cs = {
1296          .buf = upload_data,
1297          .cdw = 0,
1298          .max_dw = scissor_size / 4
1299       };
1300 
1301       radv_write_scissors(cmd_buffer, &scissor_cs);
1302       assert(scissor_cs.cdw * 4 == scissor_size);
1303       upload_data = (char *)upload_data + scissor_size;
1304    }
1305 
1306    VkWriteDescriptorSet ds_writes[5];
1307    VkDescriptorBufferInfo buf_info[ARRAY_SIZE(ds_writes)];
1308    int ds_cnt = 0;
1309    buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&token_buffer),
1310                                                .offset = 0,
1311                                                .range = upload_size};
1312    ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1313                                               .dstBinding = DGC_DESC_PARAMS,
1314                                               .dstArrayElement = 0,
1315                                               .descriptorCount = 1,
1316                                               .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1317                                               .pBufferInfo = &buf_info[ds_cnt]};
1318    ++ds_cnt;
1319 
1320    buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->preprocessBuffer,
1321                                                .offset = pGeneratedCommandsInfo->preprocessOffset,
1322                                                .range = pGeneratedCommandsInfo->preprocessSize};
1323    ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1324                                               .dstBinding = DGC_DESC_PREPARE,
1325                                               .dstArrayElement = 0,
1326                                               .descriptorCount = 1,
1327                                               .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1328                                               .pBufferInfo = &buf_info[ds_cnt]};
1329    ++ds_cnt;
1330 
1331    if (pGeneratedCommandsInfo->streamCount > 0) {
1332       buf_info[ds_cnt] =
1333          (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->pStreams[0].buffer,
1334                                   .offset = pGeneratedCommandsInfo->pStreams[0].offset,
1335                                   .range = VK_WHOLE_SIZE};
1336       ds_writes[ds_cnt] =
1337          (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1338                                 .dstBinding = DGC_DESC_STREAM,
1339                                 .dstArrayElement = 0,
1340                                 .descriptorCount = 1,
1341                                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1342                                 .pBufferInfo = &buf_info[ds_cnt]};
1343       ++ds_cnt;
1344    }
1345 
1346    if (pGeneratedCommandsInfo->sequencesCountBuffer != VK_NULL_HANDLE) {
1347       buf_info[ds_cnt] =
1348          (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->sequencesCountBuffer,
1349                                   .offset = pGeneratedCommandsInfo->sequencesCountOffset,
1350                                   .range = VK_WHOLE_SIZE};
1351       ds_writes[ds_cnt] =
1352          (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1353                                 .dstBinding = DGC_DESC_COUNT,
1354                                 .dstArrayElement = 0,
1355                                 .descriptorCount = 1,
1356                                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1357                                 .pBufferInfo = &buf_info[ds_cnt]};
1358       ++ds_cnt;
1359       params.sequence_count = UINT32_MAX;
1360    }
1361 
1362    radv_meta_save(
1363       &saved_state, cmd_buffer,
1364       RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1365 
1366    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1367                         cmd_buffer->device->meta_state.dgc_prepare.pipeline);
1368 
1369    radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1370                          cmd_buffer->device->meta_state.dgc_prepare.p_layout,
1371                          VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), &params);
1372 
1373    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1374                                  cmd_buffer->device->meta_state.dgc_prepare.p_layout, 0, ds_cnt,
1375                                  ds_writes);
1376 
1377    unsigned block_count = MAX2(1, round_up_u32(pGeneratedCommandsInfo->sequencesCount, 64));
1378    radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
1379 
1380    radv_buffer_finish(&token_buffer);
1381    radv_meta_restore(&saved_state, cmd_buffer);
1382 
1383    cmd_buffer->state.flush_bits |=
1384       RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2;
1385 }