• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "zink_compiler.h"
2 #include "zink_context.h"
3 #include "zink_program.h"
4 #include "zink_query.h"
5 #include "zink_resource.h"
6 #include "zink_screen.h"
7 #include "zink_state.h"
8 #include "zink_surface.h"
9 #include "zink_inlines.h"
10 
11 #include "tgsi/tgsi_from_mesa.h"
12 #include "util/hash_table.h"
13 #include "util/u_debug.h"
14 #include "util/u_helpers.h"
15 #include "util/u_inlines.h"
16 #include "util/u_prim.h"
17 #include "util/u_prim_restart.h"
18 
19 
20 static void
zink_emit_xfb_counter_barrier(struct zink_context * ctx)21 zink_emit_xfb_counter_barrier(struct zink_context *ctx)
22 {
23    for (unsigned i = 0; i < ctx->num_so_targets; i++) {
24       struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
25       if (!t)
26          continue;
27       struct zink_resource *res = zink_resource(t->counter_buffer);
28       VkAccessFlags access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
29       VkPipelineStageFlags stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
30       if (t->counter_buffer_valid) {
31          /* Between the pause and resume there needs to be a memory barrier for the counter buffers
32           * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
33           * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
34           * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
35           * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
36           *
37           * - from VK_EXT_transform_feedback spec
38           */
39          access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
40          stage |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
41       }
42       zink_resource_buffer_barrier(ctx, res, access, stage);
43       res->obj->unordered_read = false;
44    }
45 }
46 
47 static void
zink_emit_stream_output_targets(struct pipe_context * pctx)48 zink_emit_stream_output_targets(struct pipe_context *pctx)
49 {
50    struct zink_context *ctx = zink_context(pctx);
51    struct zink_batch *batch = &ctx->batch;
52    VkBuffer buffers[PIPE_MAX_SO_OUTPUTS] = {0};
53    VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {0};
54    VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS] = {0};
55 
56    for (unsigned i = 0; i < ctx->num_so_targets; i++) {
57       struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
58       if (!t) {
59          /* no need to reference this or anything */
60          buffers[i] = zink_resource(ctx->dummy_xfb_buffer)->obj->buffer;
61          buffer_offsets[i] = 0;
62          buffer_sizes[i] = sizeof(uint8_t);
63          continue;
64       }
65       struct zink_resource *res = zink_resource(t->base.buffer);
66       if (!res->so_valid)
67          /* resource has been rebound */
68          t->counter_buffer_valid = false;
69       buffers[i] = res->obj->buffer;
70       zink_batch_reference_resource_rw(batch, res, true);
71       buffer_offsets[i] = t->base.buffer_offset;
72       buffer_sizes[i] = t->base.buffer_size;
73       res->so_valid = true;
74       util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset,
75                      t->base.buffer_offset + t->base.buffer_size);
76    }
77 
78    VKCTX(CmdBindTransformFeedbackBuffersEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets,
79                                                  buffers, buffer_offsets,
80                                                  buffer_sizes);
81    ctx->dirty_so_targets = false;
82 }
83 
84 ALWAYS_INLINE static void
check_buffer_barrier(struct zink_context * ctx,struct pipe_resource * pres,VkAccessFlags flags,VkPipelineStageFlags pipeline)85 check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline)
86 {
87    struct zink_resource *res = zink_resource(pres);
88    zink_resource_buffer_barrier(ctx, res, flags, pipeline);
89    res->obj->unordered_read = false;
90 }
91 
92 ALWAYS_INLINE static void
barrier_draw_buffers(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_indirect_info * dindirect,struct pipe_resource * index_buffer)93 barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinfo,
94                      const struct pipe_draw_indirect_info *dindirect, struct pipe_resource *index_buffer)
95 {
96    if (index_buffer)
97       check_buffer_barrier(ctx, index_buffer, VK_ACCESS_INDEX_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
98    if (dindirect && dindirect->buffer) {
99       check_buffer_barrier(ctx, dindirect->buffer,
100                            VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
101       if (dindirect->indirect_draw_count)
102          check_buffer_barrier(ctx, dindirect->indirect_draw_count,
103                               VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
104    }
105 }
106 
107 template <zink_dynamic_state DYNAMIC_STATE>
108 static void
zink_bind_vertex_buffers(struct zink_batch * batch,struct zink_context * ctx)109 zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
110 {
111    VkBuffer buffers[PIPE_MAX_ATTRIBS];
112    VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
113    VkDeviceSize buffer_strides[PIPE_MAX_ATTRIBS];
114    struct zink_vertex_elements_state *elems = ctx->element_state;
115    struct zink_screen *screen = zink_screen(ctx->base.screen);
116 
117    if (!elems->hw_state.num_bindings)
118       return;
119 
120    for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
121       struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i];
122       assert(vb);
123       if (vb->buffer.resource) {
124          struct zink_resource *res = zink_resource(vb->buffer.resource);
125          assert(res->obj->buffer);
126          buffers[i] = res->obj->buffer;
127          buffer_offsets[i] = vb->buffer_offset;
128          buffer_strides[i] = vb->stride;
129          if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT)
130             elems->hw_state.dynbindings[i].stride = vb->stride;
131       } else {
132          buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
133          buffer_offsets[i] = 0;
134          buffer_strides[i] = 0;
135          if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT)
136             elems->hw_state.dynbindings[i].stride = 0;
137       }
138    }
139 
140    if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT)
141       VKCTX(CmdBindVertexBuffers2EXT)(batch->state->cmdbuf, 0,
142                                           elems->hw_state.num_bindings,
143                                           buffers, buffer_offsets, NULL, buffer_strides);
144    else
145       VKSCR(CmdBindVertexBuffers)(batch->state->cmdbuf, 0,
146                              elems->hw_state.num_bindings,
147                              buffers, buffer_offsets);
148 
149    if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT)
150       VKCTX(CmdSetVertexInputEXT)(batch->state->cmdbuf,
151                                       elems->hw_state.num_bindings, elems->hw_state.dynbindings,
152                                       elems->hw_state.num_attribs, elems->hw_state.dynattribs);
153 
154    ctx->vertex_buffers_dirty = false;
155 }
156 
157 static void
zink_bind_vertex_state(struct zink_batch * batch,struct zink_context * ctx,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask)158 zink_bind_vertex_state(struct zink_batch *batch, struct zink_context *ctx,
159                        struct pipe_vertex_state *vstate, uint32_t partial_velem_mask)
160 {
161    if (!vstate->input.vbuffer.buffer.resource)
162       return;
163 
164    const struct zink_vertex_elements_hw_state *hw_state = zink_vertex_state_mask(vstate, partial_velem_mask, true);
165    assert(hw_state);
166 
167    struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
168    zink_batch_resource_usage_set(&ctx->batch, res, false);
169    VkDeviceSize offset = vstate->input.vbuffer.buffer_offset;
170    VKCTX(CmdBindVertexBuffers)(batch->state->cmdbuf, 0,
171                                hw_state->num_bindings,
172                                &res->obj->buffer, &offset);
173 
174    VKCTX(CmdSetVertexInputEXT)(batch->state->cmdbuf,
175                                hw_state->num_bindings, hw_state->dynbindings,
176                                hw_state->num_attribs, hw_state->dynattribs);
177 }
178 
179 static void
update_gfx_program(struct zink_context * ctx)180 update_gfx_program(struct zink_context *ctx)
181 {
182    if (ctx->last_vertex_stage_dirty) {
183       enum pipe_shader_type pstage = pipe_shader_type_from_mesa(ctx->last_vertex_stage->nir->info.stage);
184       ctx->dirty_shader_stages |= BITFIELD_BIT(pstage);
185       memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
186              &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
187              sizeof(struct zink_vs_key_base));
188       ctx->last_vertex_stage_dirty = false;
189    }
190    unsigned bits = BITFIELD_MASK(PIPE_SHADER_COMPUTE);
191    if (ctx->gfx_dirty) {
192       struct zink_gfx_program *prog = NULL;
193 
194       struct hash_table *ht = &ctx->program_cache[ctx->shader_stages >> 2];
195       const uint32_t hash = ctx->gfx_hash;
196       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
197       if (entry) {
198          prog = (struct zink_gfx_program*)entry->data;
199          u_foreach_bit(stage, prog->stages_present & ~ctx->dirty_shader_stages)
200             ctx->gfx_pipeline_state.modules[stage] = prog->modules[stage]->shader;
201          /* ensure variants are always updated if keys have changed since last use */
202          ctx->dirty_shader_stages |= prog->stages_present;
203       } else {
204          ctx->dirty_shader_stages |= bits;
205          prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
206          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
207       }
208       zink_update_gfx_program(ctx, prog);
209       if (prog && prog != ctx->curr_program)
210          zink_batch_reference_program(&ctx->batch, &prog->base);
211       if (ctx->curr_program)
212          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
213       ctx->curr_program = prog;
214       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
215       ctx->gfx_dirty = false;
216    } else if (ctx->dirty_shader_stages & bits) {
217       /* remove old hash */
218       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
219       zink_update_gfx_program(ctx, ctx->curr_program);
220       /* apply new hash */
221       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
222    }
223    ctx->dirty_shader_stages &= ~bits;
224 }
225 
226 ALWAYS_INLINE static void
update_drawid(struct zink_context * ctx,unsigned draw_id)227 update_drawid(struct zink_context *ctx, unsigned draw_id)
228 {
229    VKCTX(CmdPushConstants)(ctx->batch.state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
230                       offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned),
231                       &draw_id);
232 }
233 
234 ALWAYS_INLINE static void
draw_indexed_need_index_buffer_unref(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)235 draw_indexed_need_index_buffer_unref(struct zink_context *ctx,
236              const struct pipe_draw_info *dinfo,
237              const struct pipe_draw_start_count_bias *draws,
238              unsigned num_draws,
239              unsigned draw_id,
240              bool needs_drawid)
241 {
242    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
243    if (dinfo->increment_draw_id && needs_drawid) {
244       for (unsigned i = 0; i < num_draws; i++) {
245          update_drawid(ctx, draw_id);
246          VKCTX(CmdDrawIndexed)(cmdbuf,
247             draws[i].count, dinfo->instance_count,
248             0, draws[i].index_bias, dinfo->start_instance);
249          draw_id++;
250       }
251    } else {
252       if (needs_drawid)
253          update_drawid(ctx, draw_id);
254       for (unsigned i = 0; i < num_draws; i++)
255          VKCTX(CmdDrawIndexed)(cmdbuf,
256             draws[i].count, dinfo->instance_count,
257             0, draws[i].index_bias, dinfo->start_instance);
258 
259    }
260 }
261 
262 template <zink_multidraw HAS_MULTIDRAW>
263 ALWAYS_INLINE static void
draw_indexed(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)264 draw_indexed(struct zink_context *ctx,
265              const struct pipe_draw_info *dinfo,
266              const struct pipe_draw_start_count_bias *draws,
267              unsigned num_draws,
268              unsigned draw_id,
269              bool needs_drawid)
270 {
271    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
272    if (dinfo->increment_draw_id && needs_drawid) {
273       for (unsigned i = 0; i < num_draws; i++) {
274          update_drawid(ctx, draw_id);
275          VKCTX(CmdDrawIndexed)(cmdbuf,
276             draws[i].count, dinfo->instance_count,
277             draws[i].start, draws[i].index_bias, dinfo->start_instance);
278          draw_id++;
279       }
280    } else {
281       if (needs_drawid)
282          update_drawid(ctx, draw_id);
283       if (HAS_MULTIDRAW) {
284          VKCTX(CmdDrawMultiIndexedEXT)(cmdbuf, num_draws, (const VkMultiDrawIndexedInfoEXT*)draws,
285                                        dinfo->instance_count,
286                                        dinfo->start_instance, sizeof(struct pipe_draw_start_count_bias),
287                                        dinfo->index_bias_varies ? NULL : &draws[0].index_bias);
288       } else {
289          for (unsigned i = 0; i < num_draws; i++)
290             VKCTX(CmdDrawIndexed)(cmdbuf,
291                draws[i].count, dinfo->instance_count,
292                draws[i].start, draws[i].index_bias, dinfo->start_instance);
293       }
294    }
295 }
296 
297 template <zink_multidraw HAS_MULTIDRAW>
298 ALWAYS_INLINE static void
draw(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)299 draw(struct zink_context *ctx,
300      const struct pipe_draw_info *dinfo,
301      const struct pipe_draw_start_count_bias *draws,
302      unsigned num_draws,
303      unsigned draw_id,
304      bool needs_drawid)
305 {
306    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
307    if (dinfo->increment_draw_id && needs_drawid) {
308       for (unsigned i = 0; i < num_draws; i++) {
309          update_drawid(ctx, draw_id);
310          VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
311          draw_id++;
312       }
313    } else {
314       if (needs_drawid)
315          update_drawid(ctx, draw_id);
316       if (HAS_MULTIDRAW)
317          VKCTX(CmdDrawMultiEXT)(cmdbuf, num_draws, (const VkMultiDrawInfoEXT*)draws,
318                                 dinfo->instance_count, dinfo->start_instance,
319                                 sizeof(struct pipe_draw_start_count_bias));
320       else {
321          for (unsigned i = 0; i < num_draws; i++)
322             VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
323 
324       }
325    }
326 }
327 
328 static void
update_barriers(struct zink_context * ctx,bool is_compute,struct pipe_resource * index,struct pipe_resource * indirect,struct pipe_resource * indirect_draw_count)329 update_barriers(struct zink_context *ctx, bool is_compute,
330                 struct pipe_resource *index, struct pipe_resource *indirect, struct pipe_resource *indirect_draw_count)
331 {
332    if (!ctx->need_barriers[is_compute]->entries)
333       return;
334    struct set *need_barriers = ctx->need_barriers[is_compute];
335    ctx->barrier_set_idx[is_compute] = !ctx->barrier_set_idx[is_compute];
336    ctx->need_barriers[is_compute] = &ctx->update_barriers[is_compute][ctx->barrier_set_idx[is_compute]];
337    set_foreach(need_barriers, he) {
338       struct zink_resource *res = (struct zink_resource *)he->key;
339       if (res->bind_count[is_compute]) {
340          VkPipelineStageFlagBits pipeline = is_compute ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : res->gfx_barrier;
341          if (res->base.b.target == PIPE_BUFFER)
342             zink_resource_buffer_barrier(ctx, res, res->barrier_access[is_compute], pipeline);
343          else {
344             VkImageLayout layout = zink_descriptor_util_image_layout_eval(ctx, res, is_compute);
345             if (layout != res->layout)
346                zink_resource_image_barrier(ctx, res, layout, res->barrier_access[is_compute], pipeline);
347          }
348          if (zink_resource_access_is_write(res->barrier_access[is_compute]))
349             res->obj->unordered_read = res->obj->unordered_write = false;
350          else
351             res->obj->unordered_read = false;
352          /* always barrier on draw if this resource has either multiple image write binds or
353           * image write binds and image read binds
354           */
355          if (res->write_bind_count[is_compute] && res->bind_count[is_compute] > 1)
356             _mesa_set_add_pre_hashed(ctx->need_barriers[is_compute], he->hash, res);
357       }
358       _mesa_set_remove(need_barriers, he);
359       if (!need_barriers->entries)
360          break;
361    }
362 }
363 
364 template <bool BATCH_CHANGED>
365 static bool
update_gfx_pipeline(struct zink_context * ctx,struct zink_batch_state * bs,enum pipe_prim_type mode)366 update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum pipe_prim_type mode)
367 {
368    VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
369    update_gfx_program(ctx);
370    VkPipeline pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
371    bool pipeline_changed = prev_pipeline != pipeline;
372    if (BATCH_CHANGED || pipeline_changed)
373       VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
374    return pipeline_changed;
375 }
376 
377 static bool
hack_conditional_render(struct pipe_context * pctx,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)378 hack_conditional_render(struct pipe_context *pctx,
379                         const struct pipe_draw_info *dinfo,
380                         unsigned drawid_offset,
381                         const struct pipe_draw_indirect_info *dindirect,
382                         const struct pipe_draw_start_count_bias *draws,
383                         unsigned num_draws)
384 {
385    struct zink_context *ctx = zink_context(pctx);
386    struct zink_batch_state *bs = ctx->batch.state;
387    static bool warned;
388    if (!warned) {
389       fprintf(stderr, "ZINK: warning, this is cpu-based conditional rendering, say bye-bye to fps\n");
390       warned = true;
391    }
392    if (!zink_check_conditional_render(ctx))
393       return false;
394    if (bs != ctx->batch.state) {
395       bool prev = ctx->render_condition_active;
396       ctx->render_condition_active = false;
397       zink_select_draw_vbo(ctx);
398       pctx->draw_vbo(pctx, dinfo, drawid_offset, dindirect, draws, num_draws);
399       ctx->render_condition_active = prev;
400       return false;
401    }
402    return true;
403 }
404 
405 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED, bool DRAW_STATE>
406 void
zink_draw(struct pipe_context * pctx,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask)407 zink_draw(struct pipe_context *pctx,
408           const struct pipe_draw_info *dinfo,
409           unsigned drawid_offset,
410           const struct pipe_draw_indirect_info *dindirect,
411           const struct pipe_draw_start_count_bias *draws,
412           unsigned num_draws,
413           struct pipe_vertex_state *vstate,
414           uint32_t partial_velem_mask)
415 {
416    if (!dindirect && (!draws[0].count || !dinfo->instance_count))
417       return;
418 
419    struct zink_context *ctx = zink_context(pctx);
420    struct zink_screen *screen = zink_screen(pctx->screen);
421    struct zink_rasterizer_state *rast_state = ctx->rast_state;
422    struct zink_depth_stencil_alpha_state *dsa_state = ctx->dsa_state;
423    struct zink_batch *batch = &ctx->batch;
424    struct zink_so_target *so_target =
425       dindirect && dindirect->count_from_stream_output ?
426          zink_so_target(dindirect->count_from_stream_output) : NULL;
427    VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS];
428    VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS];
429    bool need_index_buffer_unref = false;
430    bool mode_changed = ctx->gfx_pipeline_state.gfx_prim_mode != dinfo->mode;
431    bool reads_drawid = ctx->shader_reads_drawid;
432    bool reads_basevertex = ctx->shader_reads_basevertex;
433    unsigned work_count = ctx->batch.work_count;
434    enum pipe_prim_type mode = (enum pipe_prim_type)dinfo->mode;
435 
436    if (unlikely(!screen->info.have_EXT_conditional_rendering)) {
437       if (!hack_conditional_render(pctx, dinfo, drawid_offset, dindirect, draws, num_draws))
438          return;
439    }
440 
441    if (ctx->memory_barrier)
442       zink_flush_memory_barrier(ctx, false);
443 
444    if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter)) {
445       ctx->buffer_rebind_counter = screen->buffer_rebind_counter;
446       zink_rebind_all_buffers(ctx);
447    }
448 
449    if (unlikely(ctx->image_rebind_counter < screen->image_rebind_counter)) {
450       ctx->image_rebind_counter = screen->image_rebind_counter;
451       zink_rebind_all_images(ctx);
452    }
453 
454    unsigned index_offset = 0;
455    unsigned index_size = dinfo->index_size;
456    struct pipe_resource *index_buffer = NULL;
457    if (index_size > 0) {
458       if (dinfo->has_user_indices) {
459          if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer, &index_offset, 4)) {
460             debug_printf("util_upload_index_buffer() failed\n");
461             return;
462          }
463          /* this will have extra refs from tc */
464          if (screen->threaded)
465             zink_batch_reference_resource_move(batch, zink_resource(index_buffer));
466          else
467             zink_batch_reference_resource(batch, zink_resource(index_buffer));
468       } else {
469          index_buffer = dinfo->index.resource;
470          zink_batch_reference_resource_rw(batch, zink_resource(index_buffer), false);
471       }
472       assert(index_size <= 4 && index_size != 3);
473       assert(index_size != 1 || screen->info.have_EXT_index_type_uint8);
474    }
475 
476    bool have_streamout = !!ctx->num_so_targets;
477    if (have_streamout) {
478       zink_emit_xfb_counter_barrier(ctx);
479       if (ctx->dirty_so_targets) {
480          /* have to loop here and below because barriers must be emitted out of renderpass,
481           * but xfb buffers can't be bound before the renderpass is active to avoid
482           * breaking from recursion
483           */
484          for (unsigned i = 0; i < ctx->num_so_targets; i++) {
485             struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
486             if (t) {
487                struct zink_resource *res = zink_resource(t->base.buffer);
488                zink_resource_buffer_barrier(ctx, res,
489                                             VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
490                res->obj->unordered_read = res->obj->unordered_write = false;
491             }
492          }
493       }
494    }
495 
496    barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer);
497    /* this may re-emit draw buffer barriers, but such synchronization is harmless */
498    update_barriers(ctx, false, index_buffer, dindirect ? dindirect->buffer : NULL, dindirect ? dindirect->indirect_draw_count : NULL);
499 
500    /* ensure synchronization between doing streamout with counter buffer
501     * and using counter buffer for indirect draw
502     */
503    if (so_target && so_target->counter_buffer_valid) {
504       struct zink_resource *res = zink_resource(so_target->counter_buffer);
505       zink_resource_buffer_barrier(ctx, res,
506                                    VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
507                                    VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
508       res->obj->unordered_read = false;
509    }
510 
511    zink_query_update_gs_states(ctx, dinfo->was_line_loop);
512 
513    if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
514       zink_batch_no_rp(ctx);
515       VkMemoryBarrier mb;
516       mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
517       mb.pNext = NULL;
518       mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
519       mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
520       VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
521                                 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
522                                 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
523                                 0, 1, &mb, 0, NULL, 0, NULL);
524    }
525 
526    zink_batch_rp(ctx);
527    /* check dead swapchain */
528    if (unlikely(!ctx->batch.in_rp))
529       return;
530 
531    if (BATCH_CHANGED)
532       zink_update_descriptor_refs(ctx, false);
533 
534    /* these must be after renderpass start to avoid issues with recursion */
535    bool drawid_broken = false;
536    if (reads_drawid && (!dindirect || !dindirect->buffer))
537       drawid_broken = (drawid_offset != 0 ||
538                       (!HAS_MULTIDRAW && num_draws > 1) ||
539                       (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id));
540    if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid)
541       zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken;
542    if (mode_changed) {
543       bool points_changed = false;
544       if (mode == PIPE_PRIM_POINTS) {
545          ctx->gfx_pipeline_state.has_points++;
546          points_changed = true;
547       } else if (ctx->gfx_pipeline_state.gfx_prim_mode == PIPE_PRIM_POINTS) {
548          ctx->gfx_pipeline_state.has_points--;
549          points_changed = true;
550       }
551       if (points_changed && ctx->rast_state->base.point_quad_rasterization)
552          zink_set_fs_point_coord_key(ctx);
553    }
554    ctx->gfx_pipeline_state.gfx_prim_mode = mode;
555 
556    if (index_size) {
557       const VkIndexType index_type[3] = {
558          VK_INDEX_TYPE_UINT8_EXT,
559          VK_INDEX_TYPE_UINT16,
560          VK_INDEX_TYPE_UINT32,
561       };
562       struct zink_resource *res = zink_resource(index_buffer);
563       VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]);
564    }
565    if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2) {
566       if (ctx->gfx_pipeline_state.dyn_state2.primitive_restart != dinfo->primitive_restart)
567          ctx->gfx_pipeline_state.dirty = true;
568       ctx->gfx_pipeline_state.dyn_state2.primitive_restart = dinfo->primitive_restart;
569    }
570 
571    if (have_streamout && ctx->dirty_so_targets)
572       zink_emit_stream_output_targets(pctx);
573 
574    bool pipeline_changed = update_gfx_pipeline<BATCH_CHANGED>(ctx, batch->state, mode);
575 
576    if (BATCH_CHANGED || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
577       VkViewport viewports[PIPE_MAX_VIEWPORTS];
578       for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
579          VkViewport viewport = {
580             ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0],
581             ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1],
582             MAX2(ctx->vp_state.viewport_states[i].scale[0] * 2, 1),
583             ctx->vp_state.viewport_states[i].scale[1] * 2,
584             CLAMP(ctx->rast_state->base.clip_halfz ?
585                   ctx->vp_state.viewport_states[i].translate[2] :
586                   ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2],
587                   0, 1),
588             CLAMP(ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2],
589                   0, 1)
590          };
591          if (!ctx->rast_state->base.half_pixel_center) {
592              /* magic constant value from dxvk */
593              float cf = 0.5f - (1.0f / 128.0f);
594              viewport.x += cf;
595              if (viewport.height < 0)
596                 viewport.y += cf;
597              else
598                 viewport.y -= cf;
599          }
600          viewports[i] = viewport;
601       }
602       if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
603          VKCTX(CmdSetViewportWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports);
604       else
605          VKCTX(CmdSetViewport)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, viewports);
606    }
607    if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
608       VkRect2D scissors[PIPE_MAX_VIEWPORTS];
609       if (ctx->rast_state->base.scissor) {
610          for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
611             scissors[i].offset.x = ctx->vp_state.scissor_states[i].minx;
612             scissors[i].offset.y = ctx->vp_state.scissor_states[i].miny;
613             scissors[i].extent.width = ctx->vp_state.scissor_states[i].maxx - ctx->vp_state.scissor_states[i].minx;
614             scissors[i].extent.height = ctx->vp_state.scissor_states[i].maxy - ctx->vp_state.scissor_states[i].miny;
615          }
616       } else {
617          for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
618             scissors[i].offset.x = 0;
619             scissors[i].offset.y = 0;
620             scissors[i].extent.width = ctx->fb_state.width;
621             scissors[i].extent.height = ctx->fb_state.height;
622          }
623       }
624       if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
625          VKCTX(CmdSetScissorWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors);
626       else
627          VKCTX(CmdSetScissor)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, scissors);
628    }
629    ctx->vp_state_changed = false;
630    ctx->scissor_changed = false;
631 
632    if (BATCH_CHANGED || ctx->stencil_ref_changed) {
633       VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
634                                ctx->stencil_ref.ref_value[0]);
635       VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
636                                ctx->stencil_ref.ref_value[1]);
637       ctx->stencil_ref_changed = false;
638    }
639 
640    if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) {
641       VKCTX(CmdSetDepthBoundsTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_bounds_test);
642       if (dsa_state->hw_state.depth_bounds_test)
643          VKCTX(CmdSetDepthBounds)(batch->state->cmdbuf,
644                              dsa_state->hw_state.min_depth_bounds,
645                              dsa_state->hw_state.max_depth_bounds);
646       VKCTX(CmdSetDepthTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_test);
647       if (dsa_state->hw_state.depth_test)
648          VKCTX(CmdSetDepthCompareOpEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_compare_op);
649       VKCTX(CmdSetDepthWriteEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_write);
650       VKCTX(CmdSetStencilTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.stencil_test);
651       if (dsa_state->hw_state.stencil_test) {
652          VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
653                                        dsa_state->hw_state.stencil_front.failOp,
654                                        dsa_state->hw_state.stencil_front.passOp,
655                                        dsa_state->hw_state.stencil_front.depthFailOp,
656                                        dsa_state->hw_state.stencil_front.compareOp);
657          VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
658                                        dsa_state->hw_state.stencil_back.failOp,
659                                        dsa_state->hw_state.stencil_back.passOp,
660                                        dsa_state->hw_state.stencil_back.depthFailOp,
661                                        dsa_state->hw_state.stencil_back.compareOp);
662          if (dsa_state->base.stencil[1].enabled) {
663             VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.writeMask);
664             VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.writeMask);
665             VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.compareMask);
666             VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.compareMask);
667          } else {
668             VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
669             VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
670          }
671       } else {
672          VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
673          VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
674       }
675    }
676    ctx->dsa_state_changed = false;
677 
678    bool rast_state_changed = ctx->rast_state_changed;
679    if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || rast_state_changed)) {
680       VKCTX(CmdSetFrontFaceEXT)(batch->state->cmdbuf, (VkFrontFace)ctx->gfx_pipeline_state.dyn_state1.front_face);
681       VKCTX(CmdSetCullModeEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.cull_mode);
682    }
683    if ((BATCH_CHANGED || rast_state_changed) &&
684        screen->info.have_EXT_line_rasterization && rast_state->base.line_stipple_enable)
685       VKCTX(CmdSetLineStippleEXT)(batch->state->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern);
686 
687    if (BATCH_CHANGED || ctx->rast_state_changed) {
688       enum pipe_prim_type reduced_prim = ctx->last_vertex_stage->reduced_prim;
689       if (reduced_prim == PIPE_PRIM_MAX)
690          reduced_prim = u_reduced_prim(mode);
691 
692       bool depth_bias = false;
693       switch (reduced_prim) {
694       case PIPE_PRIM_POINTS:
695          depth_bias = rast_state->offset_point;
696          break;
697 
698       case PIPE_PRIM_LINES:
699          depth_bias = rast_state->offset_line;
700          break;
701 
702       case PIPE_PRIM_TRIANGLES:
703          depth_bias = rast_state->offset_tri;
704          break;
705 
706       default:
707          unreachable("unexpected reduced prim");
708       }
709 
710       VKCTX(CmdSetLineWidth)(batch->state->cmdbuf, rast_state->line_width);
711       if (depth_bias) {
712          if (rast_state->base.offset_units_unscaled) {
713             VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units * ctx->depth_bias_scale_factor, rast_state->offset_clamp, rast_state->offset_scale);
714          } else {
715             VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
716          }
717       } else {
718          VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, 0.0f, 0.0f, 0.0f);
719       }
720    }
721    ctx->rast_state_changed = false;
722 
723    if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) {
724       if (ctx->sample_locations_changed) {
725          VkSampleLocationsInfoEXT loc;
726          zink_init_vk_sample_locations(ctx, &loc);
727          VKCTX(CmdSetSampleLocationsEXT)(batch->state->cmdbuf, &loc);
728       }
729       ctx->sample_locations_changed = false;
730    }
731 
732    if ((BATCH_CHANGED || ctx->blend_state_changed) &&
733        ctx->gfx_pipeline_state.blend_state->need_blend_constants) {
734       VKCTX(CmdSetBlendConstants)(batch->state->cmdbuf, ctx->blend_constants);
735    }
736    ctx->blend_state_changed = false;
737 
738    if (DRAW_STATE)
739       zink_bind_vertex_state(batch, ctx, vstate, partial_velem_mask);
740    else if (BATCH_CHANGED || ctx->vertex_buffers_dirty) {
741       if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || ctx->gfx_pipeline_state.uses_dynamic_stride)
742          zink_bind_vertex_buffers<DYNAMIC_STATE>(batch, ctx);
743       else
744          zink_bind_vertex_buffers<ZINK_NO_DYNAMIC_STATE>(batch, ctx);
745    }
746 
747    if (BATCH_CHANGED) {
748       ctx->pipeline_changed[0] = false;
749       zink_select_draw_vbo(ctx);
750    }
751 
752    if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || mode_changed)) {
753       VKCTX(CmdSetPrimitiveTopologyEXT)(batch->state->cmdbuf, zink_primitive_topology(mode));
754    }
755 
756    if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) {
757       VKCTX(CmdSetPrimitiveRestartEnableEXT)(batch->state->cmdbuf, dinfo->primitive_restart);
758       ctx->primitive_restart = dinfo->primitive_restart;
759    }
760 
761    if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->rasterizer_discard_changed)) {
762       VKCTX(CmdSetRasterizerDiscardEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard);
763       ctx->rasterizer_discard_changed = false;
764    }
765 
766    if (zink_program_has_descriptors(&ctx->curr_program->base))
767       screen->descriptors_update(ctx, false);
768 
769    if (ctx->di.any_bindless_dirty &&
770        /* some apps (d3dretrace) call MakeTextureHandleResidentARB randomly */
771        zink_program_has_descriptors(&ctx->curr_program->base) &&
772        ctx->curr_program->base.dd->bindless)
773       zink_descriptors_update_bindless(ctx);
774 
775    if (reads_basevertex) {
776       unsigned draw_mode_is_indexed = index_size > 0;
777       VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
778                          offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned),
779                          &draw_mode_is_indexed);
780    }
781    if (ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL] && ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL]->is_generated) {
782       VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
783                          offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6,
784                          &ctx->tess_levels[0]);
785    }
786 
787    if (have_streamout) {
788       for (unsigned i = 0; i < ctx->num_so_targets; i++) {
789          struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
790          counter_buffers[i] = VK_NULL_HANDLE;
791          if (t) {
792             struct zink_resource *res = zink_resource(t->counter_buffer);
793             t->stride = ctx->last_vertex_stage->sinfo.so_info.stride[i] * sizeof(uint32_t);
794             zink_batch_reference_resource_rw(batch, res, true);
795             res->obj->unordered_read = res->obj->unordered_write = false;
796             if (t->counter_buffer_valid) {
797                counter_buffers[i] = res->obj->buffer;
798                counter_buffer_offsets[i] = t->counter_buffer_offset;
799             }
800          }
801       }
802       VKCTX(CmdBeginTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
803    }
804 
805    bool needs_drawid = reads_drawid && zink_get_last_vertex_key(ctx)->push_drawid;
806    work_count += num_draws;
807    if (index_size > 0) {
808       if (dindirect && dindirect->buffer) {
809          assert(num_draws == 1);
810          if (needs_drawid)
811             update_drawid(ctx, drawid_offset);
812          struct zink_resource *indirect = zink_resource(dindirect->buffer);
813          zink_batch_reference_resource_rw(batch, indirect, false);
814          if (dindirect->indirect_draw_count) {
815              struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
816              zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
817              VKCTX(CmdDrawIndexedIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
818                                                 indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
819                                                 dindirect->draw_count, dindirect->stride);
820          } else
821             VKCTX(CmdDrawIndexedIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
822       } else {
823          if (need_index_buffer_unref)
824             draw_indexed_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
825          else
826             draw_indexed<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
827       }
828    } else {
829       if (so_target && screen->info.tf_props.transformFeedbackDraw) {
830          /* GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_api attempts a bogus xfb
831           * draw using a streamout target that has no data
832           * to avoid hanging the gpu, reject any such draws
833           */
834          if (so_target->counter_buffer_valid) {
835             if (needs_drawid)
836                update_drawid(ctx, drawid_offset);
837             zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false);
838             zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true);
839             VKCTX(CmdDrawIndirectByteCountEXT)(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance,
840                                           zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0,
841                                           MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride));
842          }
843       } else if (dindirect && dindirect->buffer) {
844          assert(num_draws == 1);
845          if (needs_drawid)
846             update_drawid(ctx, drawid_offset);
847          struct zink_resource *indirect = zink_resource(dindirect->buffer);
848          zink_batch_reference_resource_rw(batch, indirect, false);
849          if (dindirect->indirect_draw_count) {
850              struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
851              zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
852              VKCTX(CmdDrawIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
853                                            indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
854                                            dindirect->draw_count, dindirect->stride);
855          } else
856             VKCTX(CmdDrawIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
857       } else {
858          draw<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
859       }
860    }
861 
862    if (have_streamout) {
863       for (unsigned i = 0; i < ctx->num_so_targets; i++) {
864          struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
865          if (t) {
866             counter_buffers[i] = zink_resource(t->counter_buffer)->obj->buffer;
867             counter_buffer_offsets[i] = t->counter_buffer_offset;
868             t->counter_buffer_valid = true;
869          }
870       }
871       VKCTX(CmdEndTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
872    }
873    batch->has_work = true;
874    batch->last_was_compute = false;
875    ctx->batch.work_count = work_count;
876    /* flush if there's >100k draws */
877    if (unlikely(work_count >= 30000) || ctx->oom_flush)
878       pctx->flush(pctx, NULL, 0);
879 }
880 
881 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
882 static void
zink_draw_vbo(struct pipe_context * pctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)883 zink_draw_vbo(struct pipe_context *pctx,
884               const struct pipe_draw_info *info,
885               unsigned drawid_offset,
886               const struct pipe_draw_indirect_info *indirect,
887               const struct pipe_draw_start_count_bias *draws,
888               unsigned num_draws)
889 {
890    zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, false>(pctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0);
891 }
892 
893 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
894 static void
zink_draw_vertex_state(struct pipe_context * pctx,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)895 zink_draw_vertex_state(struct pipe_context *pctx,
896                        struct pipe_vertex_state *vstate,
897                        uint32_t partial_velem_mask,
898                        struct pipe_draw_vertex_state_info info,
899                        const struct pipe_draw_start_count_bias *draws,
900                        unsigned num_draws)
901 {
902    struct pipe_draw_info dinfo = {};
903 
904    dinfo.mode = info.mode;
905    dinfo.index_size = 4;
906    dinfo.instance_count = 1;
907    dinfo.index.resource = vstate->input.indexbuf;
908    struct zink_context *ctx = zink_context(pctx);
909    struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
910    zink_resource_buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
911                                 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
912    res->obj->unordered_read = false;
913    struct zink_vertex_elements_hw_state *hw_state = ctx->gfx_pipeline_state.element_state;
914    ctx->gfx_pipeline_state.element_state = &((struct zink_vertex_state*)vstate)->velems.hw_state;
915 
916    zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, true>(pctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask);
917    ctx->gfx_pipeline_state.element_state = hw_state;
918 
919    if (info.take_vertex_state_ownership)
920       pipe_vertex_state_reference(&vstate, NULL);
921 }
922 
923 template <bool BATCH_CHANGED>
924 static void
zink_launch_grid(struct pipe_context * pctx,const struct pipe_grid_info * info)925 zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
926 {
927    struct zink_context *ctx = zink_context(pctx);
928    struct zink_screen *screen = zink_screen(pctx->screen);
929    struct zink_batch *batch = &ctx->batch;
930 
931    if (ctx->render_condition_active)
932       zink_start_conditional_render(ctx);
933 
934    if (info->indirect) {
935       /*
936          VK_ACCESS_INDIRECT_COMMAND_READ_BIT specifies read access to indirect command data read as
937          part of an indirect build, trace, drawing or dispatching command. Such access occurs in the
938          VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT pipeline stage.
939 
940          - Chapter 7. Synchronization and Cache Control
941        */
942       check_buffer_barrier(ctx, info->indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
943    }
944 
945    update_barriers(ctx, true, NULL, info->indirect, NULL);
946    if (ctx->memory_barrier)
947       zink_flush_memory_barrier(ctx, true);
948 
949    if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
950       zink_batch_no_rp(ctx);
951       VkMemoryBarrier mb;
952       mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
953       mb.pNext = NULL;
954       mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
955       mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
956       VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
957                                 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
958                                 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
959                                 0, 1, &mb, 0, NULL, 0, NULL);
960    }
961 
962    if (zink_program_has_descriptors(&ctx->curr_compute->base))
963       screen->descriptors_update(ctx, true);
964    if (ctx->di.any_bindless_dirty && ctx->curr_compute->base.dd->bindless)
965       zink_descriptors_update_bindless(ctx);
966 
967    zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info->block);
968    VkPipeline prev_pipeline = ctx->compute_pipeline_state.pipeline;
969 
970    if (BATCH_CHANGED) {
971       zink_update_descriptor_refs(ctx, true);
972       zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
973    }
974    if (ctx->dirty_shader_stages & BITFIELD_BIT(PIPE_SHADER_COMPUTE)) {
975       /* update inlinable constants */
976       zink_update_compute_program(ctx);
977       ctx->dirty_shader_stages &= ~BITFIELD_BIT(PIPE_SHADER_COMPUTE);
978    }
979 
980    VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute,
981                                                &ctx->compute_pipeline_state);
982 
983    if (prev_pipeline != pipeline || BATCH_CHANGED)
984       VKCTX(CmdBindPipeline)(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
985    if (BATCH_CHANGED) {
986       ctx->pipeline_changed[1] = false;
987       zink_select_launch_grid(ctx);
988    }
989 
990    if (BITSET_TEST(ctx->compute_stage->nir->info.system_values_read, SYSTEM_VALUE_WORK_DIM))
991       VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_compute->base.layout, VK_SHADER_STAGE_COMPUTE_BIT,
992                          offsetof(struct zink_cs_push_constant, work_dim), sizeof(uint32_t),
993                          &info->work_dim);
994 
995    batch->work_count++;
996    zink_batch_no_rp(ctx);
997    if (info->indirect) {
998       VKCTX(CmdDispatchIndirect)(batch->state->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset);
999       zink_batch_reference_resource_rw(batch, zink_resource(info->indirect), false);
1000    } else
1001       VKCTX(CmdDispatch)(batch->state->cmdbuf, info->grid[0], info->grid[1], info->grid[2]);
1002    batch->has_work = true;
1003    batch->last_was_compute = true;
1004    /* flush if there's >100k computes */
1005    if (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush)
1006       pctx->flush(pctx, NULL, 0);
1007 }
1008 
1009 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
1010 static void
init_batch_changed_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][4][2],pipe_draw_vertex_state_func draw_state_array[2][4][2])1011 init_batch_changed_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][4][2], pipe_draw_vertex_state_func draw_state_array[2][4][2])
1012 {
1013    draw_vbo_array[HAS_MULTIDRAW][DYNAMIC_STATE][BATCH_CHANGED] = zink_draw_vbo<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED>;
1014    draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED>;
1015 }
1016 
1017 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE>
1018 static void
init_dynamic_state_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][4][2],pipe_draw_vertex_state_func draw_state_array[2][4][2])1019 init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][4][2], pipe_draw_vertex_state_func draw_state_array[2][4][2])
1020 {
1021    init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, false>(ctx, draw_vbo_array, draw_state_array);
1022    init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, true>(ctx, draw_vbo_array, draw_state_array);
1023 }
1024 
1025 template <zink_multidraw HAS_MULTIDRAW>
1026 static void
init_multidraw_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][4][2],pipe_draw_vertex_state_func draw_state_array[2][4][2])1027 init_multidraw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][4][2], pipe_draw_vertex_state_func draw_state_array[2][4][2])
1028 {
1029    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_NO_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
1030    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
1031    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE2>(ctx, draw_vbo_array, draw_state_array);
1032    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array, draw_state_array);
1033 }
1034 
1035 static void
init_all_draw_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][4][2],pipe_draw_vertex_state_func draw_state_array[2][4][2])1036 init_all_draw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][4][2], pipe_draw_vertex_state_func draw_state_array[2][4][2])
1037 {
1038    init_multidraw_functions<ZINK_NO_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
1039    init_multidraw_functions<ZINK_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
1040 }
1041 
1042 template <bool BATCH_CHANGED>
1043 static void
init_grid_batch_changed_functions(struct zink_context * ctx)1044 init_grid_batch_changed_functions(struct zink_context *ctx)
1045 {
1046    ctx->launch_grid[BATCH_CHANGED] = zink_launch_grid<BATCH_CHANGED>;
1047 }
1048 
1049 static void
init_all_grid_functions(struct zink_context * ctx)1050 init_all_grid_functions(struct zink_context *ctx)
1051 {
1052    init_grid_batch_changed_functions<false>(ctx);
1053    init_grid_batch_changed_functions<true>(ctx);
1054 }
1055 
1056 static void
zink_invalid_draw_vbo(struct pipe_context * pipe,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1057 zink_invalid_draw_vbo(struct pipe_context *pipe,
1058                       const struct pipe_draw_info *dinfo,
1059                       unsigned drawid_offset,
1060                       const struct pipe_draw_indirect_info *dindirect,
1061                       const struct pipe_draw_start_count_bias *draws,
1062                       unsigned num_draws)
1063 {
1064    unreachable("vertex shader not bound");
1065 }
1066 
1067 static void
zink_invalid_draw_vertex_state(struct pipe_context * pipe,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1068 zink_invalid_draw_vertex_state(struct pipe_context *pipe,
1069                                struct pipe_vertex_state *vstate,
1070                                uint32_t partial_velem_mask,
1071                                struct pipe_draw_vertex_state_info info,
1072                                const struct pipe_draw_start_count_bias *draws,
1073                                unsigned num_draws)
1074 {
1075    unreachable("vertex shader not bound");
1076 }
1077 
1078 static void
zink_invalid_launch_grid(struct pipe_context * pctx,const struct pipe_grid_info * info)1079 zink_invalid_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
1080 {
1081    unreachable("compute shader not bound");
1082 }
1083 
1084 template <unsigned STAGE_MASK>
1085 static uint32_t
hash_gfx_program(const void * key)1086 hash_gfx_program(const void *key)
1087 {
1088    const struct zink_shader **shaders = (const struct zink_shader**)key;
1089    uint32_t base_hash = shaders[PIPE_SHADER_VERTEX]->hash ^ shaders[PIPE_SHADER_FRAGMENT]->hash;
1090    if (STAGE_MASK == 0) //VS+FS
1091       return base_hash;
1092    if (STAGE_MASK == 1) //VS+GS+FS
1093       return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash;
1094    /*VS+TCS+FS isn't a thing */
1095    /*VS+TCS+GS+FS isn't a thing */
1096    if (STAGE_MASK == 4) //VS+TES+FS
1097       return base_hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1098    if (STAGE_MASK == 5) //VS+TES+GS+FS
1099       return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1100    if (STAGE_MASK == 6) //VS+TCS+TES+FS
1101       return base_hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1102 
1103    /* all stages */
1104    return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1105 }
1106 
1107 template <unsigned STAGE_MASK>
1108 static bool
equals_gfx_program(const void * a,const void * b)1109 equals_gfx_program(const void *a, const void *b)
1110 {
1111    const void **sa = (const void**)a;
1112    const void **sb = (const void**)b;
1113    if (STAGE_MASK == 0) //VS+FS
1114       return !memcmp(a, b, sizeof(void*) * 2);
1115    if (STAGE_MASK == 1) //VS+GS+FS
1116       return !memcmp(a, b, sizeof(void*) * 3);
1117    /*VS+TCS+FS isn't a thing */
1118    /*VS+TCS+GS+FS isn't a thing */
1119    if (STAGE_MASK == 4) //VS+TES+FS
1120       return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 2);
1121    if (STAGE_MASK == 5) //VS+TES+GS+FS
1122       return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 3);
1123    if (STAGE_MASK == 6) //VS+TCS+TES+FS
1124       return !memcmp(&sa[PIPE_SHADER_TESS_CTRL], &sb[PIPE_SHADER_TESS_CTRL], sizeof(void*) * 2) &&
1125              !memcmp(a, b, sizeof(void*) * 2);
1126 
1127    /* all stages */
1128    return !memcmp(a, b, sizeof(void*) * ZINK_SHADER_COUNT);
1129 }
1130 
1131 extern "C"
1132 void
zink_init_draw_functions(struct zink_context * ctx,struct zink_screen * screen)1133 zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen)
1134 {
1135    pipe_draw_vbo_func draw_vbo_array[2][4] //multidraw, zink_dynamic_state
1136                                     [2];   //batch changed
1137    pipe_draw_vertex_state_func draw_state_array[2][4] //multidraw, zink_dynamic_state
1138                                                [2];   //batch changed
1139    zink_dynamic_state dynamic;
1140    if (screen->info.have_EXT_extended_dynamic_state) {
1141       if (screen->info.have_EXT_extended_dynamic_state2) {
1142          if (screen->info.have_EXT_vertex_input_dynamic_state)
1143             dynamic = ZINK_DYNAMIC_VERTEX_INPUT;
1144          else
1145             dynamic = ZINK_DYNAMIC_STATE2;
1146       } else {
1147          dynamic = ZINK_DYNAMIC_STATE;
1148       }
1149    } else {
1150       dynamic = ZINK_NO_DYNAMIC_STATE;
1151    }
1152    init_all_draw_functions(ctx, draw_vbo_array, draw_state_array);
1153    memcpy(ctx->draw_vbo, &draw_vbo_array[screen->info.have_EXT_multi_draw]
1154                                         [dynamic],
1155                                         sizeof(ctx->draw_vbo));
1156    memcpy(ctx->draw_state, &draw_state_array[screen->info.have_EXT_multi_draw]
1157                                           [dynamic],
1158                                           sizeof(ctx->draw_state));
1159 
1160    /* Bind a fake draw_vbo, so that draw_vbo isn't NULL, which would skip
1161     * initialization of callbacks in upper layers (such as u_threaded_context).
1162     */
1163    ctx->base.draw_vbo = zink_invalid_draw_vbo;
1164    ctx->base.draw_vertex_state = zink_invalid_draw_vertex_state;
1165 
1166    _mesa_hash_table_init(&ctx->program_cache[0], ctx, hash_gfx_program<0>, equals_gfx_program<0>);
1167    _mesa_hash_table_init(&ctx->program_cache[1], ctx, hash_gfx_program<1>, equals_gfx_program<1>);
1168    _mesa_hash_table_init(&ctx->program_cache[2], ctx, hash_gfx_program<2>, equals_gfx_program<2>);
1169    _mesa_hash_table_init(&ctx->program_cache[3], ctx, hash_gfx_program<3>, equals_gfx_program<3>);
1170    _mesa_hash_table_init(&ctx->program_cache[4], ctx, hash_gfx_program<4>, equals_gfx_program<4>);
1171    _mesa_hash_table_init(&ctx->program_cache[5], ctx, hash_gfx_program<5>, equals_gfx_program<5>);
1172    _mesa_hash_table_init(&ctx->program_cache[6], ctx, hash_gfx_program<6>, equals_gfx_program<6>);
1173    _mesa_hash_table_init(&ctx->program_cache[7], ctx, hash_gfx_program<7>, equals_gfx_program<7>);
1174 }
1175 
1176 void
zink_init_grid_functions(struct zink_context * ctx)1177 zink_init_grid_functions(struct zink_context *ctx)
1178 {
1179    init_all_grid_functions(ctx);
1180    /* Bind a fake launch_grid, so that draw_vbo isn't NULL, which would skip
1181     * initialization of callbacks in upper layers (such as u_threaded_context).
1182     */
1183    ctx->base.launch_grid = zink_invalid_launch_grid;
1184 }
1185