• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "zink_batch.h"
2 #include "zink_compiler.h"
3 #include "zink_context.h"
4 #include "zink_descriptors.h"
5 #include "zink_program.h"
6 #include "zink_program_state.hpp"
7 #include "zink_query.h"
8 #include "zink_resource.h"
9 #include "zink_screen.h"
10 #include "zink_state.h"
11 #include "zink_surface.h"
12 #include "zink_inlines.h"
13 
14 #include "util/hash_table.h"
15 #include "util/u_cpu_detect.h"
16 #include "util/u_debug.h"
17 #include "util/u_helpers.h"
18 #include "util/u_inlines.h"
19 #include "util/u_prim.h"
20 #include "util/u_prim_restart.h"
21 
22 static void
zink_emit_xfb_counter_barrier(struct zink_context * ctx)23 zink_emit_xfb_counter_barrier(struct zink_context *ctx)
24 {
25    for (unsigned i = 0; i < ctx->num_so_targets; i++) {
26       struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
27       if (!t)
28          continue;
29       struct zink_resource *res = zink_resource(t->counter_buffer);
30       VkAccessFlags access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
31       VkPipelineStageFlags stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
32       if (t->counter_buffer_valid) {
33          /* Between the pause and resume there needs to be a memory barrier for the counter buffers
34           * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
35           * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
36           * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
37           * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
38           *
39           * - from VK_EXT_transform_feedback spec
40           */
41          access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
42          stage |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
43       }
44       zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, access, stage);
45       if (!ctx->unordered_blitting)
46          res->obj->unordered_read = false;
47    }
48 }
49 
50 static void
zink_emit_stream_output_targets(struct pipe_context * pctx)51 zink_emit_stream_output_targets(struct pipe_context *pctx)
52 {
53    struct zink_context *ctx = zink_context(pctx);
54    struct zink_batch *batch = &ctx->batch;
55    VkBuffer buffers[PIPE_MAX_SO_BUFFERS] = {0};
56    VkDeviceSize buffer_offsets[PIPE_MAX_SO_BUFFERS] = {0};
57    VkDeviceSize buffer_sizes[PIPE_MAX_SO_BUFFERS] = {0};
58 
59    for (unsigned i = 0; i < ctx->num_so_targets; i++) {
60       struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
61       if (!t) {
62          /* no need to reference this or anything */
63          buffers[i] = zink_resource(ctx->dummy_xfb_buffer)->obj->buffer;
64          buffer_offsets[i] = 0;
65          buffer_sizes[i] = sizeof(uint8_t);
66          continue;
67       }
68       struct zink_resource *res = zink_resource(t->base.buffer);
69       if (!res->so_valid)
70          /* resource has been rebound */
71          t->counter_buffer_valid = false;
72       buffers[i] = res->obj->buffer;
73       zink_batch_reference_resource_rw(batch, res, true);
74       buffer_offsets[i] = t->base.buffer_offset;
75       buffer_sizes[i] = t->base.buffer_size;
76       res->so_valid = true;
77       if (!ctx->unordered_blitting) {
78          res->obj->unordered_read = res->obj->unordered_write = false;
79          res->obj->access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
80          res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
81       }
82       util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset,
83                      t->base.buffer_offset + t->base.buffer_size);
84    }
85 
86    VKCTX(CmdBindTransformFeedbackBuffersEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets,
87                                                  buffers, buffer_offsets,
88                                                  buffer_sizes);
89    ctx->dirty_so_targets = false;
90 }
91 
92 ALWAYS_INLINE static void
check_buffer_barrier(struct zink_context * ctx,struct pipe_resource * pres,VkAccessFlags flags,VkPipelineStageFlags pipeline)93 check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline)
94 {
95    struct zink_resource *res = zink_resource(pres);
96    zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, flags, pipeline);
97    if (!ctx->unordered_blitting)
98       res->obj->unordered_read = false;
99 }
100 
101 ALWAYS_INLINE static void
barrier_draw_buffers(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_indirect_info * dindirect,struct pipe_resource * index_buffer)102 barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinfo,
103                      const struct pipe_draw_indirect_info *dindirect, struct pipe_resource *index_buffer)
104 {
105    if (index_buffer)
106       check_buffer_barrier(ctx, index_buffer, VK_ACCESS_INDEX_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
107    if (dindirect && dindirect->buffer) {
108       check_buffer_barrier(ctx, dindirect->buffer,
109                            VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
110       if (dindirect->indirect_draw_count)
111          check_buffer_barrier(ctx, dindirect->indirect_draw_count,
112                               VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
113    }
114 }
115 
116 static void
bind_vertex_buffers_dgc(struct zink_context * ctx)117 bind_vertex_buffers_dgc(struct zink_context *ctx)
118 {
119    struct zink_vertex_elements_state *elems = ctx->element_state;
120 
121    ctx->vertex_buffers_dirty = false;
122    if (!elems->hw_state.num_bindings)
123       return;
124    for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
125       struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->hw_state.binding_map[i];
126       assert(vb);
127       VkBindVertexBufferIndirectCommandNV *ptr;
128       VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr);
129       token->vertexBindingUnit = ctx->element_state->hw_state.binding_map[i];
130       if (vb->buffer.resource) {
131          struct zink_resource *res = zink_resource(vb->buffer.resource);
132          assert(res->obj->bda);
133          ptr->bufferAddress = res->obj->bda + vb->buffer_offset;
134          ptr->size = res->base.b.width0;
135          ptr->stride = ctx->element_state->hw_state.b.strides[i];
136       } else {
137          ptr->bufferAddress = 0;
138          ptr->size = 0;
139          ptr->stride = 0;
140       }
141    }
142 }
143 
144 template <zink_dynamic_state DYNAMIC_STATE>
145 static void
zink_bind_vertex_buffers(struct zink_batch * batch,struct zink_context * ctx)146 zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
147 {
148    VkBuffer buffers[PIPE_MAX_ATTRIBS];
149    VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
150    struct zink_vertex_elements_state *elems = ctx->element_state;
151    struct zink_screen *screen = zink_screen(ctx->base.screen);
152 
153    for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
154       struct pipe_vertex_buffer *vb = ctx->vertex_buffers + elems->hw_state.binding_map[i];
155       assert(vb);
156       if (vb->buffer.resource) {
157          struct zink_resource *res = zink_resource(vb->buffer.resource);
158          assert(res->obj->buffer);
159          buffers[i] = res->obj->buffer;
160          buffer_offsets[i] = vb->buffer_offset;
161       } else {
162          buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
163          buffer_offsets[i] = 0;
164       }
165    }
166 
167    if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE &&
168        DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 &&
169        DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT) {
170       if (elems->hw_state.num_bindings)
171          VKCTX(CmdBindVertexBuffers2)(batch->state->cmdbuf, 0,
172                                              elems->hw_state.num_bindings,
173                                              buffers, buffer_offsets, NULL, elems->hw_state.b.strides);
174    } else if (elems->hw_state.num_bindings)
175       VKSCR(CmdBindVertexBuffers)(batch->state->cmdbuf, 0,
176                              elems->hw_state.num_bindings,
177                              buffers, buffer_offsets);
178 
179    if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2 || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT)
180       VKCTX(CmdSetVertexInputEXT)(batch->state->cmdbuf,
181                                       elems->hw_state.num_bindings, elems->hw_state.dynbindings,
182                                       elems->hw_state.num_attribs, elems->hw_state.dynattribs);
183 
184    ctx->vertex_buffers_dirty = false;
185 }
186 
187 ALWAYS_INLINE static void
update_drawid(struct zink_context * ctx,unsigned draw_id)188 update_drawid(struct zink_context *ctx, unsigned draw_id)
189 {
190    VKCTX(CmdPushConstants)(ctx->batch.state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS,
191                       offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned),
192                       &draw_id);
193 }
194 
195 static void
update_drawid_dgc(struct zink_context * ctx,unsigned draw_id)196 update_drawid_dgc(struct zink_context *ctx, unsigned draw_id)
197 {
198    uint32_t *ptr;
199    VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr);
200    token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_id);
201    token->pushconstantSize = sizeof(unsigned);
202    *ptr = draw_id;
203 }
204 
205 ALWAYS_INLINE static void
draw_indexed_dgc_need_index_buffer_unref(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)206 draw_indexed_dgc_need_index_buffer_unref(struct zink_context *ctx,
207                  const struct pipe_draw_info *dinfo,
208                  const struct pipe_draw_start_count_bias *draws,
209                  unsigned num_draws,
210                  unsigned draw_id,
211                  bool needs_drawid)
212 {
213    if (dinfo->increment_draw_id && needs_drawid) {
214       for (unsigned i = 0; i < num_draws; i++) {
215          update_drawid_dgc(ctx, draw_id);
216          VkDrawIndexedIndirectCommand *ptr, cmd = {
217             draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance
218          };
219          zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
220          *ptr = cmd;
221          draw_id++;
222       }
223    } else {
224       if (needs_drawid)
225          update_drawid_dgc(ctx, draw_id);
226       for (unsigned i = 0; i < num_draws; i++) {
227          VkDrawIndexedIndirectCommand *ptr, cmd = {
228             draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance
229          };
230          zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
231          *ptr = cmd;
232       }
233    }
234 }
235 
236 ALWAYS_INLINE static void
draw_indexed_need_index_buffer_unref(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)237 draw_indexed_need_index_buffer_unref(struct zink_context *ctx,
238              const struct pipe_draw_info *dinfo,
239              const struct pipe_draw_start_count_bias *draws,
240              unsigned num_draws,
241              unsigned draw_id,
242              bool needs_drawid)
243 {
244    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
245    if (dinfo->increment_draw_id && needs_drawid) {
246       for (unsigned i = 0; i < num_draws; i++) {
247          update_drawid(ctx, draw_id);
248          VKCTX(CmdDrawIndexed)(cmdbuf,
249             draws[i].count, dinfo->instance_count,
250             0, draws[i].index_bias, dinfo->start_instance);
251          draw_id++;
252       }
253    } else {
254       if (needs_drawid)
255          update_drawid(ctx, draw_id);
256       for (unsigned i = 0; i < num_draws; i++)
257          VKCTX(CmdDrawIndexed)(cmdbuf,
258             draws[i].count, dinfo->instance_count,
259             0, draws[i].index_bias, dinfo->start_instance);
260 
261    }
262 }
263 
264 ALWAYS_INLINE static void
draw_indexed_dgc(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)265 draw_indexed_dgc(struct zink_context *ctx,
266                  const struct pipe_draw_info *dinfo,
267                  const struct pipe_draw_start_count_bias *draws,
268                  unsigned num_draws,
269                  unsigned draw_id,
270                  bool needs_drawid)
271 {
272    if (dinfo->increment_draw_id && needs_drawid) {
273       for (unsigned i = 0; i < num_draws; i++) {
274          update_drawid_dgc(ctx, draw_id);
275          VkDrawIndexedIndirectCommand *ptr, cmd = {
276             draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance
277          };
278          zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
279          *ptr = cmd;
280          draw_id++;
281       }
282    } else {
283       if (needs_drawid)
284          update_drawid_dgc(ctx, draw_id);
285       for (unsigned i = 0; i < num_draws; i++) {
286          VkDrawIndexedIndirectCommand *ptr, cmd = {
287             draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance
288          };
289          zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
290          *ptr = cmd;
291       }
292    }
293 }
294 
295 template <zink_multidraw HAS_MULTIDRAW>
296 ALWAYS_INLINE static void
draw_indexed(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)297 draw_indexed(struct zink_context *ctx,
298              const struct pipe_draw_info *dinfo,
299              const struct pipe_draw_start_count_bias *draws,
300              unsigned num_draws,
301              unsigned draw_id,
302              bool needs_drawid)
303 {
304    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
305    if (dinfo->increment_draw_id && needs_drawid) {
306       for (unsigned i = 0; i < num_draws; i++) {
307          update_drawid(ctx, draw_id);
308          VKCTX(CmdDrawIndexed)(cmdbuf,
309             draws[i].count, dinfo->instance_count,
310             draws[i].start, draws[i].index_bias, dinfo->start_instance);
311          draw_id++;
312       }
313    } else {
314       if (needs_drawid)
315          update_drawid(ctx, draw_id);
316       if (HAS_MULTIDRAW) {
317          VKCTX(CmdDrawMultiIndexedEXT)(cmdbuf, num_draws, (const VkMultiDrawIndexedInfoEXT*)draws,
318                                        dinfo->instance_count,
319                                        dinfo->start_instance, sizeof(struct pipe_draw_start_count_bias),
320                                        dinfo->index_bias_varies ? NULL : &draws[0].index_bias);
321       } else {
322          for (unsigned i = 0; i < num_draws; i++)
323             VKCTX(CmdDrawIndexed)(cmdbuf,
324                draws[i].count, dinfo->instance_count,
325                draws[i].start, draws[i].index_bias, dinfo->start_instance);
326       }
327    }
328 }
329 
330 ALWAYS_INLINE static void
draw_dgc(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)331 draw_dgc(struct zink_context *ctx,
332          const struct pipe_draw_info *dinfo,
333          const struct pipe_draw_start_count_bias *draws,
334          unsigned num_draws,
335          unsigned draw_id,
336          bool needs_drawid)
337 {
338    if (dinfo->increment_draw_id && needs_drawid) {
339       for (unsigned i = 0; i < num_draws; i++) {
340          update_drawid_dgc(ctx, draw_id);
341          VkDrawIndirectCommand *ptr, cmd = {
342             draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance
343          };
344          zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr);
345          *ptr = cmd;
346          draw_id++;
347       }
348    } else {
349       if (needs_drawid)
350          update_drawid_dgc(ctx, draw_id);
351       for (unsigned i = 0; i < num_draws; i++) {
352          VkDrawIndirectCommand *ptr, cmd = {
353             draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance
354          };
355          zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr);
356          *ptr = cmd;
357       }
358    }
359 }
360 
361 template <zink_multidraw HAS_MULTIDRAW>
362 ALWAYS_INLINE static void
draw(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)363 draw(struct zink_context *ctx,
364      const struct pipe_draw_info *dinfo,
365      const struct pipe_draw_start_count_bias *draws,
366      unsigned num_draws,
367      unsigned draw_id,
368      bool needs_drawid)
369 {
370    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
371    if (dinfo->increment_draw_id && needs_drawid) {
372       for (unsigned i = 0; i < num_draws; i++) {
373          update_drawid(ctx, draw_id);
374          VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
375          draw_id++;
376       }
377    } else {
378       if (needs_drawid)
379          update_drawid(ctx, draw_id);
380       if (HAS_MULTIDRAW)
381          VKCTX(CmdDrawMultiEXT)(cmdbuf, num_draws, (const VkMultiDrawInfoEXT*)draws,
382                                 dinfo->instance_count, dinfo->start_instance,
383                                 sizeof(struct pipe_draw_start_count_bias));
384       else {
385          for (unsigned i = 0; i < num_draws; i++)
386             VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
387 
388       }
389    }
390 }
391 
392 template <zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
393 static bool
update_gfx_pipeline(struct zink_context * ctx,struct zink_batch_state * bs,enum mesa_prim mode,bool can_dgc)394 update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum mesa_prim mode, bool can_dgc)
395 {
396    VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
397    const struct zink_screen *screen = zink_screen(ctx->base.screen);
398    bool shaders_changed = ctx->gfx_dirty || ctx->dirty_gfx_stages;
399    if (screen->optimal_keys && !ctx->is_generated_gs_bound)
400       zink_gfx_program_update_optimal(ctx);
401    else
402       zink_gfx_program_update(ctx);
403    bool pipeline_changed = false;
404    VkPipeline pipeline = VK_NULL_HANDLE;
405    if (!ctx->curr_program->base.uses_shobj) {
406       if (screen->info.have_EXT_graphics_pipeline_library)
407          pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
408       else
409          pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
410    }
411    if (pipeline) {
412       pipeline_changed = prev_pipeline != pipeline;
413       if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw) {
414          ctx->dgc.last_prog = ctx->curr_program;
415          if (unlikely(can_dgc && screen->info.nv_dgc_props.maxGraphicsShaderGroupCount == 1)) {
416             VkBindShaderGroupIndirectCommandNV *ptr;
417             zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV, (void**)&ptr);
418             util_dynarray_append(&ctx->dgc.pipelines, VkPipeline, pipeline);
419             /* zero-indexed -> base + group + num_pipelines-1 = base + num_pipelines */
420             ptr->groupIndex = util_dynarray_num_elements(&ctx->dgc.pipelines, VkPipeline) + 1;
421          } else {
422             VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
423          }
424       }
425       ctx->shobj_draw = false;
426    } else {
427       if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) {
428          VkShaderStageFlagBits stages[] = {
429             VK_SHADER_STAGE_VERTEX_BIT,
430             VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
431             VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
432             VK_SHADER_STAGE_GEOMETRY_BIT,
433             VK_SHADER_STAGE_FRAGMENT_BIT,
434          };
435          /* always rebind all stages */
436          VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects);
437          VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE);
438          VKCTX(CmdSetTessellationDomainOriginEXT)(bs->cmdbuf, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT);
439          VKCTX(CmdSetSampleLocationsEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.sample_locations_enabled);
440       }
441       ctx->shobj_draw = true;
442    }
443    return pipeline_changed;
444 }
445 
446 static enum mesa_prim
zink_prim_type(const struct zink_context * ctx,const struct pipe_draw_info * dinfo)447 zink_prim_type(const struct zink_context *ctx,
448                const struct pipe_draw_info *dinfo)
449 {
450    if (ctx->gfx_pipeline_state.shader_rast_prim != MESA_PRIM_COUNT)
451       return ctx->gfx_pipeline_state.shader_rast_prim;
452 
453    return u_reduced_prim((enum mesa_prim)dinfo->mode);
454 }
455 
456 static enum mesa_prim
zink_rast_prim(const struct zink_context * ctx,const struct pipe_draw_info * dinfo)457 zink_rast_prim(const struct zink_context *ctx,
458                const struct pipe_draw_info *dinfo)
459 {
460    enum mesa_prim prim_type = zink_prim_type(ctx, dinfo);
461    assert(prim_type != MESA_PRIM_COUNT);
462 
463    if (prim_type == MESA_PRIM_TRIANGLES &&
464        ctx->rast_state->base.fill_front != PIPE_POLYGON_MODE_FILL) {
465       switch(ctx->rast_state->base.fill_front) {
466       case PIPE_POLYGON_MODE_POINT:
467          return MESA_PRIM_POINTS;
468       case PIPE_POLYGON_MODE_LINE:
469          return MESA_PRIM_LINES;
470       default:
471          unreachable("unexpected polygon mode");
472       }
473    }
474 
475    return prim_type;
476 }
477 
478 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED, bool DRAW_STATE>
479 void
zink_draw(struct pipe_context * pctx,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask)480 zink_draw(struct pipe_context *pctx,
481           const struct pipe_draw_info *dinfo,
482           unsigned drawid_offset,
483           const struct pipe_draw_indirect_info *dindirect,
484           const struct pipe_draw_start_count_bias *draws,
485           unsigned num_draws,
486           struct pipe_vertex_state *vstate,
487           uint32_t partial_velem_mask)
488 {
489    if (!dindirect && (!draws[0].count || !dinfo->instance_count))
490       return;
491 
492    struct zink_context *ctx = zink_context(pctx);
493    struct zink_screen *screen = zink_screen(pctx->screen);
494    struct zink_rasterizer_state *rast_state = ctx->rast_state;
495    struct zink_depth_stencil_alpha_state *dsa_state = ctx->dsa_state;
496    struct zink_batch *batch = &ctx->batch;
497    struct zink_so_target *so_target =
498       dindirect && dindirect->count_from_stream_output ?
499          zink_so_target(dindirect->count_from_stream_output) : NULL;
500    VkBuffer counter_buffers[PIPE_MAX_SO_BUFFERS];
501    VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_BUFFERS];
502    bool need_index_buffer_unref = false;
503    bool mode_changed = ctx->gfx_pipeline_state.gfx_prim_mode != dinfo->mode;
504    bool reads_drawid = ctx->shader_reads_drawid;
505    bool reads_basevertex = ctx->shader_reads_basevertex;
506    unsigned work_count = ctx->batch.work_count;
507    enum mesa_prim mode = (enum mesa_prim)dinfo->mode;
508 
509    if (ctx->memory_barrier && !ctx->blitting)
510       zink_flush_memory_barrier(ctx, false);
511 
512    if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter && !ctx->blitting)) {
513       ctx->buffer_rebind_counter = screen->buffer_rebind_counter;
514       zink_rebind_all_buffers(ctx);
515    }
516 
517    if (unlikely(ctx->image_rebind_counter < screen->image_rebind_counter && !ctx->blitting)) {
518       ctx->image_rebind_counter = screen->image_rebind_counter;
519       zink_rebind_all_images(ctx);
520    }
521 
522    if (mode_changed)
523       zink_flush_dgc_if_enabled(ctx);
524 
525    unsigned index_offset = 0;
526    unsigned index_size = dinfo->index_size;
527    struct pipe_resource *index_buffer = NULL;
528    if (index_size > 0) {
529       if (dinfo->has_user_indices) {
530          if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer, &index_offset, 4)) {
531             debug_printf("util_upload_index_buffer() failed\n");
532             return;
533          }
534          /* this will have extra refs from tc */
535          if (screen->threaded)
536             zink_batch_reference_resource_move(batch, zink_resource(index_buffer));
537          else
538             zink_batch_reference_resource(batch, zink_resource(index_buffer));
539       } else {
540          index_buffer = dinfo->index.resource;
541          zink_batch_reference_resource_rw(batch, zink_resource(index_buffer), false);
542       }
543       assert(index_size <= 4 && index_size != 3);
544       assert(index_size != 1 || screen->info.have_EXT_index_type_uint8);
545    }
546 
547    ctx->was_line_loop = dinfo->was_line_loop;
548 
549    bool have_streamout = !!ctx->num_so_targets;
550    if (have_streamout) {
551       zink_emit_xfb_counter_barrier(ctx);
552       if (ctx->dirty_so_targets) {
553          /* have to loop here and below because barriers must be emitted out of renderpass,
554           * but xfb buffers can't be bound before the renderpass is active to avoid
555           * breaking from recursion
556           */
557          for (unsigned i = 0; i < ctx->num_so_targets; i++) {
558             struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
559             if (t) {
560                struct zink_resource *res = zink_resource(t->base.buffer);
561                zink_screen(ctx->base.screen)->buffer_barrier(ctx, res,
562                                             VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
563                if (!ctx->unordered_blitting)
564                   res->obj->unordered_read = res->obj->unordered_write = false;
565             }
566          }
567       }
568    }
569 
570    barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer);
571    /* this may re-emit draw buffer barriers, but such synchronization is harmless */
572    if (!ctx->blitting)
573       zink_update_barriers(ctx, false, index_buffer, dindirect ? dindirect->buffer : NULL, dindirect ? dindirect->indirect_draw_count : NULL);
574 
575    bool can_dgc = false;
576    if (unlikely(zink_debug & ZINK_DEBUG_DGC))
577       can_dgc = !so_target && !ctx->num_so_targets && (!dindirect || !dindirect->buffer);
578 
579    /* ensure synchronization between doing streamout with counter buffer
580     * and using counter buffer for indirect draw
581     */
582    if (so_target && so_target->counter_buffer_valid) {
583       struct zink_resource *res = zink_resource(so_target->counter_buffer);
584       zink_screen(ctx->base.screen)->buffer_barrier(ctx, res,
585                                    VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
586                                    VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
587       if (!ctx->unordered_blitting)
588          res->obj->unordered_read = false;
589    }
590 
591    zink_query_update_gs_states(ctx);
592 
593    if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
594       zink_batch_no_rp(ctx);
595       VkMemoryBarrier mb;
596       mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
597       mb.pNext = NULL;
598       mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
599       mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
600       VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
601                                 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
602                                 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
603                                 0, 1, &mb, 0, NULL, 0, NULL);
604    }
605 
606    zink_batch_rp(ctx);
607    /* check dead swapchain */
608    if (unlikely(!ctx->batch.in_rp))
609       return;
610 
611    if (BATCH_CHANGED)
612       zink_update_descriptor_refs(ctx, false);
613 
614    /* these must be after renderpass start to avoid issues with recursion */
615    bool drawid_broken = false;
616    if (reads_drawid && (!dindirect || !dindirect->buffer))
617       drawid_broken = (drawid_offset != 0 ||
618                       (!HAS_MULTIDRAW && num_draws > 1) ||
619                       (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id));
620    if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid)
621       zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken;
622 
623    bool rast_prim_changed = false;
624    bool prim_changed = false;
625    bool rast_state_changed = ctx->rast_state_changed;
626    if (mode_changed || ctx->gfx_pipeline_state.modules_changed ||
627        rast_state_changed) {
628       enum mesa_prim rast_prim = zink_rast_prim(ctx, dinfo);
629       if (rast_prim != ctx->gfx_pipeline_state.rast_prim) {
630          bool points_changed =
631             (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) !=
632             (rast_prim == MESA_PRIM_POINTS);
633 
634          prim_changed = ctx->gfx_pipeline_state.rast_prim != rast_prim;
635 
636          static bool rect_warned = false;
637          if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3 && rast_prim == MESA_PRIM_LINES && !rect_warned &&
638              (VkLineRasterizationModeEXT)rast_state->hw_state.line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT) {
639             if (screen->info.line_rast_feats.rectangularLines)
640                rect_warned = true;
641             else
642                warn_missing_feature(rect_warned, "rectangularLines");
643          }
644 
645          ctx->gfx_pipeline_state.rast_prim = rast_prim;
646          rast_prim_changed = true;
647 
648          if (points_changed && ctx->rast_state->base.point_quad_rasterization)
649             zink_set_fs_point_coord_key(ctx);
650       }
651    }
652    ctx->gfx_pipeline_state.gfx_prim_mode = mode;
653 
654    if ((mode_changed || prim_changed || rast_state_changed || ctx->gfx_pipeline_state.modules_changed)) {
655       zink_set_primitive_emulation_keys(ctx);
656    }
657 
658    if (index_size) {
659       const VkIndexType index_type[3] = {
660          VK_INDEX_TYPE_UINT8_EXT,
661          VK_INDEX_TYPE_UINT16,
662          VK_INDEX_TYPE_UINT32,
663       };
664       struct zink_resource *res = zink_resource(index_buffer);
665       if (unlikely(can_dgc)) {
666          VkBindIndexBufferIndirectCommandNV *ptr;
667          zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV, (void**)&ptr);
668          ptr->bufferAddress = res->obj->bda + index_offset;
669          ptr->size = res->base.b.width0;
670          ptr->indexType = index_type[index_size >> 1];
671       } else {
672          VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]);
673       }
674    }
675    if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2) {
676       if (ctx->gfx_pipeline_state.dyn_state2.primitive_restart != dinfo->primitive_restart)
677          ctx->gfx_pipeline_state.dirty = true;
678       ctx->gfx_pipeline_state.dyn_state2.primitive_restart = dinfo->primitive_restart;
679    }
680 
681    if (have_streamout && ctx->dirty_so_targets)
682       zink_emit_stream_output_targets(pctx);
683 
684    bool pipeline_changed = update_gfx_pipeline<DYNAMIC_STATE, BATCH_CHANGED>(ctx, batch->state, mode, can_dgc);
685 
686    if (BATCH_CHANGED || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
687       VkViewport viewports[PIPE_MAX_VIEWPORTS];
688       for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
689          VkViewport viewport = {
690             ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0],
691             ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1],
692             MAX2(ctx->vp_state.viewport_states[i].scale[0] * 2, 1),
693             ctx->vp_state.viewport_states[i].scale[1] * 2,
694             CLAMP(ctx->rast_state->base.clip_halfz ?
695                   ctx->vp_state.viewport_states[i].translate[2] :
696                   ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2],
697                   0, 1),
698             CLAMP(ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2],
699                   0, 1)
700          };
701          if (!ctx->rast_state->base.half_pixel_center) {
702              /* magic constant value from dxvk */
703              float cf = 0.5f - (1.0f / 128.0f);
704              viewport.x += cf;
705              if (viewport.height < 0)
706                 viewport.y += cf;
707              else
708                 viewport.y -= cf;
709          }
710          viewports[i] = viewport;
711       }
712       if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
713          VKCTX(CmdSetViewportWithCount)(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports);
714       else
715          VKCTX(CmdSetViewport)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, viewports);
716    }
717    if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
718       VkRect2D scissors[PIPE_MAX_VIEWPORTS];
719       if (ctx->rast_state->base.scissor) {
720          for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
721             scissors[i].offset.x = ctx->vp_state.scissor_states[i].minx;
722             scissors[i].offset.y = ctx->vp_state.scissor_states[i].miny;
723             scissors[i].extent.width = ctx->vp_state.scissor_states[i].maxx - ctx->vp_state.scissor_states[i].minx;
724             scissors[i].extent.height = ctx->vp_state.scissor_states[i].maxy - ctx->vp_state.scissor_states[i].miny;
725          }
726       } else {
727          for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
728             scissors[i].offset.x = 0;
729             scissors[i].offset.y = 0;
730             scissors[i].extent.width = ctx->fb_state.width;
731             scissors[i].extent.height = ctx->fb_state.height;
732          }
733       }
734       if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
735          VKCTX(CmdSetScissorWithCount)(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors);
736       else
737          VKCTX(CmdSetScissor)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, scissors);
738    }
739    ctx->vp_state_changed = false;
740    ctx->scissor_changed = false;
741 
742    if (BATCH_CHANGED || ctx->stencil_ref_changed) {
743       VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
744                                ctx->stencil_ref.ref_value[0]);
745       VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
746                                ctx->stencil_ref.ref_value[1]);
747       ctx->stencil_ref_changed = false;
748    }
749 
750    if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) {
751       VKCTX(CmdSetDepthBoundsTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_bounds_test);
752       if (dsa_state->hw_state.depth_bounds_test)
753          VKCTX(CmdSetDepthBounds)(batch->state->cmdbuf,
754                              dsa_state->hw_state.min_depth_bounds,
755                              dsa_state->hw_state.max_depth_bounds);
756       VKCTX(CmdSetDepthTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_test);
757       VKCTX(CmdSetDepthCompareOp)(batch->state->cmdbuf, dsa_state->hw_state.depth_compare_op);
758       VKCTX(CmdSetDepthWriteEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_write);
759       VKCTX(CmdSetStencilTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.stencil_test);
760       if (dsa_state->hw_state.stencil_test) {
761          VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
762                                        dsa_state->hw_state.stencil_front.failOp,
763                                        dsa_state->hw_state.stencil_front.passOp,
764                                        dsa_state->hw_state.stencil_front.depthFailOp,
765                                        dsa_state->hw_state.stencil_front.compareOp);
766          VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
767                                        dsa_state->hw_state.stencil_back.failOp,
768                                        dsa_state->hw_state.stencil_back.passOp,
769                                        dsa_state->hw_state.stencil_back.depthFailOp,
770                                        dsa_state->hw_state.stencil_back.compareOp);
771          if (dsa_state->base.stencil[1].enabled) {
772             VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.writeMask);
773             VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.writeMask);
774             VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.compareMask);
775             VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.compareMask);
776          } else {
777             VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
778             VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
779          }
780       } else {
781          VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
782          VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
783          VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS);
784       }
785    }
786    ctx->dsa_state_changed = false;
787 
788    if (BATCH_CHANGED || rast_state_changed) {
789       if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) {
790          VKCTX(CmdSetFrontFace)(batch->state->cmdbuf, (VkFrontFace)ctx->gfx_pipeline_state.dyn_state1.front_face);
791          VKCTX(CmdSetCullMode)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.cull_mode);
792       }
793 
794       if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3) {
795          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE))
796             VKCTX(CmdSetLineStippleEXT)(batch->state->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern);
797          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP))
798             VKCTX(CmdSetDepthClipEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.depth_clip);
799          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLAMP))
800             VKCTX(CmdSetDepthClampEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.depth_clamp);
801          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_POLYGON))
802             VKCTX(CmdSetPolygonModeEXT)(batch->state->cmdbuf, (VkPolygonMode)rast_state->hw_state.polygon_mode);
803          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_HALFZ))
804             VKCTX(CmdSetDepthClipNegativeOneToOneEXT)(batch->state->cmdbuf, !rast_state->hw_state.clip_halfz);
805          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_PV))
806             VKCTX(CmdSetProvokingVertexModeEXT)(batch->state->cmdbuf,
807                                                 rast_state->hw_state.pv_last ?
808                                                 VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT :
809                                                 VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
810          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP))
811             VKCTX(CmdSetLineRasterizationModeEXT)(batch->state->cmdbuf, rast_state->dynamic_line_mode);
812          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE_ON))
813             VKCTX(CmdSetLineStippleEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.line_stipple_enable);
814       }
815    }
816    if ((BATCH_CHANGED || ctx->sample_mask_changed) && screen->have_full_ds3) {
817       VKCTX(CmdSetRasterizationSamplesEXT)(batch->state->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1));
818       VKCTX(CmdSetSampleMaskEXT)(batch->state->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1), &ctx->gfx_pipeline_state.sample_mask);
819       ctx->sample_mask_changed = false;
820    }
821    if ((BATCH_CHANGED || ctx->blend_state_changed)) {
822       if (ctx->gfx_pipeline_state.blend_state) {
823          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A2C))
824             VKCTX(CmdSetAlphaToCoverageEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_coverage &&
825                                                                         ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0));
826          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A21))
827             VKCTX(CmdSetAlphaToOneEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_one);
828          if (ctx->fb_state.nr_cbufs) {
829             if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_ON))
830                VKCTX(CmdSetColorBlendEnableEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.enables);
831             if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_WRITE))
832                VKCTX(CmdSetColorWriteMaskEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.wrmask);
833             if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_EQ))
834                VKCTX(CmdSetColorBlendEquationEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.eq);
835          }
836          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC_ON))
837             VKCTX(CmdSetLogicOpEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_enable);
838          if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC))
839             VKCTX(CmdSetLogicOpEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_func);
840       }
841    }
842    ctx->ds3_states = 0;
843 
844    if (BATCH_CHANGED ||
845        /* only re-emit on non-batch change when actually drawing lines */
846        ((ctx->line_width_changed || rast_prim_changed) && ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES)) {
847       VKCTX(CmdSetLineWidth)(batch->state->cmdbuf, rast_state->line_width);
848       ctx->line_width_changed = false;
849    }
850 
851    if (BATCH_CHANGED || mode_changed ||
852        ctx->gfx_pipeline_state.modules_changed ||
853        rast_state_changed) {
854       bool depth_bias =
855          zink_prim_type(ctx, dinfo) == MESA_PRIM_TRIANGLES &&
856          rast_state->offset_fill;
857 
858       if (depth_bias) {
859          if (rast_state->base.offset_units_unscaled) {
860             VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units * ctx->depth_bias_scale_factor, rast_state->offset_clamp, rast_state->offset_scale);
861          } else {
862             VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
863          }
864       } else {
865          VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, 0.0f, 0.0f, 0.0f);
866       }
867    }
868    ctx->rast_state_changed = false;
869 
870    if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) {
871       if (ctx->sample_locations_changed) {
872          VkSampleLocationsInfoEXT loc;
873          zink_init_vk_sample_locations(ctx, &loc);
874          VKCTX(CmdSetSampleLocationsEXT)(batch->state->cmdbuf, &loc);
875       }
876       ctx->sample_locations_changed = false;
877    }
878 
879    if (BATCH_CHANGED || ctx->blend_color_changed) {
880       VKCTX(CmdSetBlendConstants)(batch->state->cmdbuf, ctx->blend_constants);
881    }
882    ctx->blend_state_changed = false;
883    ctx->blend_color_changed = false;
884 
885    if (!DRAW_STATE) {
886       if (BATCH_CHANGED || ctx->vertex_buffers_dirty) {
887          if (unlikely(can_dgc))
888             bind_vertex_buffers_dgc(ctx);
889          else if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || ctx->gfx_pipeline_state.uses_dynamic_stride)
890             zink_bind_vertex_buffers<DYNAMIC_STATE>(batch, ctx);
891          else
892             zink_bind_vertex_buffers<ZINK_NO_DYNAMIC_STATE>(batch, ctx);
893       }
894    }
895 
896    if (BATCH_CHANGED) {
897       ctx->pipeline_changed[0] = false;
898       zink_select_draw_vbo(ctx);
899    }
900 
901    if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || mode_changed))
902       VKCTX(CmdSetPrimitiveTopology)(batch->state->cmdbuf, zink_primitive_topology(mode));
903 
904    if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) {
905       VKCTX(CmdSetPrimitiveRestartEnable)(batch->state->cmdbuf, dinfo->primitive_restart);
906       ctx->primitive_restart = dinfo->primitive_restart;
907    }
908 
909    if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->rasterizer_discard_changed)) {
910       VKCTX(CmdSetRasterizerDiscardEnable)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard);
911       ctx->rasterizer_discard_changed = false;
912    }
913 
914    if (zink_program_has_descriptors(&ctx->curr_program->base))
915       zink_descriptors_update(ctx, false);
916 
917    if (ctx->di.any_bindless_dirty &&
918        /* some apps (d3dretrace) call MakeTextureHandleResidentARB randomly */
919        zink_program_has_descriptors(&ctx->curr_program->base) &&
920        ctx->curr_program->base.dd.bindless)
921       zink_descriptors_update_bindless(ctx);
922 
923    if (reads_basevertex) {
924       unsigned draw_mode_is_indexed = index_size > 0;
925       if (unlikely(can_dgc)) {
926          uint32_t *ptr;
927          VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr);
928          token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
929          token->pushconstantSize = sizeof(unsigned);
930          *ptr = draw_mode_is_indexed;
931       } else {
932          VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS,
933                            offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned),
934                            &draw_mode_is_indexed);
935       }
936    }
937    if (ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL] &&
938        ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) {
939       if (unlikely(can_dgc)) {
940          float *ptr;
941          VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr);
942          token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, default_inner_level);
943          token->pushconstantSize = sizeof(float) * 6;
944          memcpy(ptr, &ctx->tess_levels[0], sizeof(float) * 6);
945       } else {
946          VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS,
947                            offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6,
948                            &ctx->tess_levels[0]);
949       }
950    }
951 
952    if (!screen->optimal_keys) {
953       if (zink_get_fs_key(ctx)->lower_line_stipple ||
954           zink_get_gs_key(ctx)->lower_gl_point ||
955           zink_get_fs_key(ctx)->lower_line_smooth) {
956 
957          assert(zink_get_gs_key(ctx)->lower_line_stipple ==
958                 zink_get_fs_key(ctx)->lower_line_stipple);
959 
960          assert(zink_get_gs_key(ctx)->lower_line_smooth ==
961                 zink_get_fs_key(ctx)->lower_line_smooth);
962 
963          float viewport_scale[2] = {
964             ctx->vp_state.viewport_states[0].scale[0],
965             ctx->vp_state.viewport_states[0].scale[1]
966          };
967          VKCTX(CmdPushConstants)(batch->state->cmdbuf,
968                                  ctx->curr_program->base.layout,
969                                  VK_SHADER_STAGE_ALL_GRAPHICS,
970                                  offsetof(struct zink_gfx_push_constant, viewport_scale),
971                                  sizeof(float) * 2, &viewport_scale);
972 
973          uint32_t stipple = ctx->rast_state->base.line_stipple_pattern;
974          stipple |= ctx->rast_state->base.line_stipple_factor << 16;
975          VKCTX(CmdPushConstants)(batch->state->cmdbuf,
976                                  ctx->curr_program->base.layout,
977                                  VK_SHADER_STAGE_ALL_GRAPHICS,
978                                  offsetof(struct zink_gfx_push_constant, line_stipple_pattern),
979                                  sizeof(uint32_t), &stipple);
980 
981          if (ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.lower_line_smooth) {
982             float line_width = ctx->rast_state->base.line_width;
983             VKCTX(CmdPushConstants)(batch->state->cmdbuf,
984                                     ctx->curr_program->base.layout,
985                                     VK_SHADER_STAGE_ALL_GRAPHICS,
986                                     offsetof(struct zink_gfx_push_constant, line_width),
987                                     sizeof(uint32_t), &line_width);
988          }
989       }
990    }
991 
992    if (have_streamout) {
993       for (unsigned i = 0; i < ctx->num_so_targets; i++) {
994          struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
995          counter_buffers[i] = VK_NULL_HANDLE;
996          if (t) {
997             struct zink_resource *res = zink_resource(t->counter_buffer);
998             t->stride = ctx->last_vertex_stage->sinfo.stride[i];
999             zink_batch_reference_resource_rw(batch, res, true);
1000             if (!ctx->unordered_blitting)
1001                res->obj->unordered_read = res->obj->unordered_write = false;
1002             if (t->counter_buffer_valid) {
1003                counter_buffers[i] = res->obj->buffer;
1004                counter_buffer_offsets[i] = t->counter_buffer_offset;
1005             }
1006          }
1007       }
1008       VKCTX(CmdBeginTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
1009    }
1010 
1011    bool marker = false;
1012    if (unlikely(zink_tracing)) {
1013       VkViewport viewport = {
1014          ctx->vp_state.viewport_states[0].translate[0] - ctx->vp_state.viewport_states[0].scale[0],
1015          ctx->vp_state.viewport_states[0].translate[1] - ctx->vp_state.viewport_states[0].scale[1],
1016          MAX2(ctx->vp_state.viewport_states[0].scale[0] * 2, 1),
1017          ctx->vp_state.viewport_states[0].scale[1] * 2,
1018          CLAMP(ctx->rast_state->base.clip_halfz ?
1019                ctx->vp_state.viewport_states[0].translate[2] :
1020                ctx->vp_state.viewport_states[0].translate[2] - ctx->vp_state.viewport_states[0].scale[2],
1021                0, 1),
1022          CLAMP(ctx->vp_state.viewport_states[0].translate[2] + ctx->vp_state.viewport_states[0].scale[2],
1023                0, 1)
1024       };
1025       if (ctx->blitting) {
1026          bool is_zs = util_format_is_depth_or_stencil(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format);
1027          marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "u_blitter(%s->%s, %dx%d)",
1028                                               util_format_short_name(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format),
1029                                               util_format_short_name((is_zs ? ctx->fb_state.zsbuf : ctx->fb_state.cbufs[0])->format),
1030                                               lround(viewport.width), lround(viewport.height));
1031       } else {
1032          marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "draw(%u cbufs|%s, %dx%d)",
1033                                               ctx->fb_state.nr_cbufs,
1034                                               ctx->fb_state.zsbuf ? "zsbuf" : "",
1035                                               lround(viewport.width), lround(viewport.height));
1036       }
1037    }
1038 
1039    bool needs_drawid = reads_drawid && zink_get_last_vertex_key(ctx)->push_drawid;
1040    work_count += num_draws;
1041    if (index_size > 0) {
1042       if (dindirect && dindirect->buffer) {
1043          assert(num_draws == 1);
1044          if (needs_drawid)
1045             update_drawid(ctx, drawid_offset);
1046          struct zink_resource *indirect = zink_resource(dindirect->buffer);
1047          zink_batch_reference_resource_rw(batch, indirect, false);
1048          if (dindirect->indirect_draw_count) {
1049              struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
1050              zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
1051              VKCTX(CmdDrawIndexedIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
1052                                                 indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
1053                                                 dindirect->draw_count, dindirect->stride);
1054          } else
1055             VKCTX(CmdDrawIndexedIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
1056       } else {
1057          if (unlikely(can_dgc)) {
1058             if (need_index_buffer_unref)
1059                draw_indexed_dgc_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1060             else
1061                draw_indexed_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1062          } else if (need_index_buffer_unref) {
1063             draw_indexed_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1064          } else {
1065             draw_indexed<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1066          }
1067       }
1068    } else {
1069       if (so_target && screen->info.tf_props.transformFeedbackDraw) {
1070          /* GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_api attempts a bogus xfb
1071           * draw using a streamout target that has no data
1072           * to avoid hanging the gpu, reject any such draws
1073           */
1074          if (so_target->counter_buffer_valid) {
1075             if (needs_drawid)
1076                update_drawid(ctx, drawid_offset);
1077             zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false);
1078             zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true);
1079             VKCTX(CmdDrawIndirectByteCountEXT)(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance,
1080                                           zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0,
1081                                           MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride));
1082          }
1083       } else if (dindirect && dindirect->buffer) {
1084          assert(num_draws == 1);
1085          if (needs_drawid)
1086             update_drawid(ctx, drawid_offset);
1087          struct zink_resource *indirect = zink_resource(dindirect->buffer);
1088          zink_batch_reference_resource_rw(batch, indirect, false);
1089          if (dindirect->indirect_draw_count) {
1090              struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
1091              zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
1092              VKCTX(CmdDrawIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
1093                                            indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
1094                                            dindirect->draw_count, dindirect->stride);
1095          } else
1096             VKCTX(CmdDrawIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
1097       } else {
1098          if (unlikely(can_dgc))
1099             draw_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1100          else
1101             draw<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1102       }
1103    }
1104 
1105    if (unlikely(zink_tracing))
1106       zink_cmd_debug_marker_end(ctx, batch->state->cmdbuf, marker);
1107 
1108    ctx->dgc.valid = can_dgc;
1109    if (have_streamout) {
1110       for (unsigned i = 0; i < ctx->num_so_targets; i++) {
1111          struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
1112          if (t) {
1113             counter_buffers[i] = zink_resource(t->counter_buffer)->obj->buffer;
1114             counter_buffer_offsets[i] = t->counter_buffer_offset;
1115             t->counter_buffer_valid = true;
1116          }
1117       }
1118       VKCTX(CmdEndTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
1119    }
1120 
1121    batch->has_work = true;
1122    batch->last_was_compute = false;
1123    ctx->batch.work_count = work_count;
1124    /* flush if there's >100k draws */
1125    if (!ctx->unordered_blitting && (unlikely(work_count >= 30000) || ctx->oom_flush))
1126       pctx->flush(pctx, NULL, 0);
1127 }
1128 
1129 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
1130 static void
zink_draw_vbo(struct pipe_context * pctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1131 zink_draw_vbo(struct pipe_context *pctx,
1132               const struct pipe_draw_info *info,
1133               unsigned drawid_offset,
1134               const struct pipe_draw_indirect_info *indirect,
1135               const struct pipe_draw_start_count_bias *draws,
1136               unsigned num_draws)
1137 {
1138    zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, false>(pctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0);
1139 }
1140 
1141 template <util_popcnt HAS_POPCNT>
1142 static void
zink_vertex_state_mask(struct zink_context * ctx,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask)1143 zink_vertex_state_mask(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask)
1144 {
1145    struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate;
1146    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
1147 
1148    if (partial_velem_mask == vstate->input.full_velem_mask) {
1149       VKCTX(CmdSetVertexInputEXT)(cmdbuf,
1150                                  zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings,
1151                                  zstate->velems.hw_state.num_attribs, zstate->velems.hw_state.dynattribs);
1152       return;
1153    }
1154 
1155    VkVertexInputAttributeDescription2EXT dynattribs[PIPE_MAX_ATTRIBS];
1156    unsigned num_attribs = 0;
1157    u_foreach_bit(elem, vstate->input.full_velem_mask & partial_velem_mask) {
1158       unsigned idx = util_bitcount_fast<HAS_POPCNT>(vstate->input.full_velem_mask & BITFIELD_MASK(elem));
1159       dynattribs[num_attribs] = zstate->velems.hw_state.dynattribs[idx];
1160       dynattribs[num_attribs].location = num_attribs;
1161       num_attribs++;
1162    }
1163 
1164    VKCTX(CmdSetVertexInputEXT)(cmdbuf,
1165                                zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings,
1166                                num_attribs, dynattribs);
1167 }
1168 
1169 template <util_popcnt HAS_POPCNT>
1170 static void
zink_bind_vertex_state(struct zink_context * ctx,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask)1171 zink_bind_vertex_state(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask)
1172 {
1173    struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate;
1174    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
1175    if (!vstate->input.vbuffer.buffer.resource)
1176       return;
1177 
1178    zink_vertex_state_mask<HAS_POPCNT>(ctx, vstate, partial_velem_mask);
1179 
1180    struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
1181    zink_batch_resource_usage_set(&ctx->batch, res, false, true);
1182    VkDeviceSize offset = vstate->input.vbuffer.buffer_offset;
1183    if (unlikely(zink_debug & ZINK_DEBUG_DGC)) {
1184       VkBindVertexBufferIndirectCommandNV *ptr;
1185       VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr);
1186       token->vertexBindingUnit = 0;
1187       token->vertexDynamicStride = VK_FALSE;
1188       ptr->bufferAddress = res->obj->bda + offset;
1189       ptr->size = res->base.b.width0;
1190       ptr->stride = 0;
1191    } else {
1192       VKCTX(CmdBindVertexBuffers)(cmdbuf, 0,
1193                                  zstate->velems.hw_state.num_bindings,
1194                                  &res->obj->buffer, &offset);
1195    }
1196 }
1197 
1198 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, util_popcnt HAS_POPCNT, bool BATCH_CHANGED>
1199 static void
zink_draw_vertex_state(struct pipe_context * pctx,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1200 zink_draw_vertex_state(struct pipe_context *pctx,
1201                        struct pipe_vertex_state *vstate,
1202                        uint32_t partial_velem_mask,
1203                        struct pipe_draw_vertex_state_info info,
1204                        const struct pipe_draw_start_count_bias *draws,
1205                        unsigned num_draws)
1206 {
1207    struct pipe_draw_info dinfo = {};
1208 
1209    dinfo.mode = info.mode;
1210    dinfo.index_size = 4;
1211    dinfo.instance_count = 1;
1212    dinfo.index.resource = vstate->input.indexbuf;
1213    struct zink_context *ctx = zink_context(pctx);
1214    struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
1215    zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1216                                 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
1217    if (!ctx->unordered_blitting)
1218       res->obj->unordered_read = false;
1219    zink_bind_vertex_state<HAS_POPCNT>(ctx, vstate, partial_velem_mask);
1220 
1221    zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, true>(pctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask);
1222    /* ensure ctx->vertex_buffers gets rebound on next non-vstate draw */
1223    ctx->vertex_buffers_dirty = true;
1224 
1225    if (info.take_vertex_state_ownership)
1226       pipe_vertex_state_reference(&vstate, NULL);
1227 }
1228 
1229 template <bool BATCH_CHANGED>
1230 static void
zink_launch_grid(struct pipe_context * pctx,const struct pipe_grid_info * info)1231 zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
1232 {
1233    struct zink_context *ctx = zink_context(pctx);
1234    struct zink_screen *screen = zink_screen(pctx->screen);
1235    struct zink_batch *batch = &ctx->batch;
1236 
1237    if (ctx->render_condition_active)
1238       zink_start_conditional_render(ctx);
1239 
1240    if (info->indirect) {
1241       /*
1242          VK_ACCESS_INDIRECT_COMMAND_READ_BIT specifies read access to indirect command data read as
1243          part of an indirect build, trace, drawing or dispatching command. Such access occurs in the
1244          VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT pipeline stage.
1245 
1246          - Chapter 7. Synchronization and Cache Control
1247        */
1248       check_buffer_barrier(ctx, info->indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
1249    }
1250 
1251    zink_update_barriers(ctx, true, NULL, info->indirect, NULL);
1252    if (ctx->memory_barrier)
1253       zink_flush_memory_barrier(ctx, true);
1254 
1255    if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
1256       zink_batch_no_rp(ctx);
1257       VkMemoryBarrier mb;
1258       mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
1259       mb.pNext = NULL;
1260       mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
1261       mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1262       VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
1263                                 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
1264                                 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
1265                                 0, 1, &mb, 0, NULL, 0, NULL);
1266    }
1267 
1268    zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info);
1269    VkPipeline prev_pipeline = ctx->compute_pipeline_state.pipeline;
1270 
1271    if (BATCH_CHANGED) {
1272       zink_update_descriptor_refs(ctx, true);
1273    }
1274    if (ctx->compute_dirty) {
1275       /* update inlinable constants */
1276       zink_update_compute_program(ctx);
1277       ctx->compute_dirty = false;
1278    }
1279 
1280    VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute,
1281                                                &ctx->compute_pipeline_state);
1282 
1283    if (prev_pipeline != pipeline || BATCH_CHANGED)
1284       VKCTX(CmdBindPipeline)(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1285    if (BATCH_CHANGED) {
1286       ctx->pipeline_changed[1] = false;
1287       zink_select_launch_grid(ctx);
1288    }
1289 
1290    if (zink_program_has_descriptors(&ctx->curr_compute->base))
1291       zink_descriptors_update(ctx, true);
1292    if (ctx->di.any_bindless_dirty && ctx->curr_compute->base.dd.bindless)
1293       zink_descriptors_update_bindless(ctx);
1294 
1295    batch->work_count++;
1296    zink_batch_no_rp(ctx);
1297    if (!ctx->queries_disabled)
1298       zink_resume_cs_query(ctx);
1299    if (info->indirect) {
1300       VKCTX(CmdDispatchIndirect)(batch->state->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset);
1301       zink_batch_reference_resource_rw(batch, zink_resource(info->indirect), false);
1302    } else
1303       VKCTX(CmdDispatch)(batch->state->cmdbuf, info->grid[0], info->grid[1], info->grid[2]);
1304    batch->has_work = true;
1305    batch->last_was_compute = true;
1306    /* flush if there's >100k computes */
1307    if (!ctx->unordered_blitting && (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush))
1308       pctx->flush(pctx, NULL, 0);
1309 }
1310 
1311 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
1312 static void
init_batch_changed_functions(struct zink_context * ctx,pipe_draw_func draw_vbo_array[2][6][2],pipe_draw_vertex_state_func draw_state_array[2][6][2][2])1313 init_batch_changed_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
1314 {
1315    draw_vbo_array[HAS_MULTIDRAW][DYNAMIC_STATE][BATCH_CHANGED] = zink_draw_vbo<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED>;
1316    draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][0][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, POPCNT_NO, BATCH_CHANGED>;
1317    draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][1][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, POPCNT_YES, BATCH_CHANGED>;
1318 }
1319 
1320 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE>
1321 static void
init_dynamic_state_functions(struct zink_context * ctx,pipe_draw_func draw_vbo_array[2][6][2],pipe_draw_vertex_state_func draw_state_array[2][6][2][2])1322 init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
1323 {
1324    init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, false>(ctx, draw_vbo_array, draw_state_array);
1325    init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, true>(ctx, draw_vbo_array, draw_state_array);
1326 }
1327 
1328 template <zink_multidraw HAS_MULTIDRAW>
1329 static void
init_multidraw_functions(struct zink_context * ctx,pipe_draw_func draw_vbo_array[2][6][2],pipe_draw_vertex_state_func draw_state_array[2][6][2][2])1330 init_multidraw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
1331 {
1332    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_NO_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
1333    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
1334    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE2>(ctx, draw_vbo_array, draw_state_array);
1335    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT2>(ctx, draw_vbo_array, draw_state_array);
1336    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE3>(ctx, draw_vbo_array, draw_state_array);
1337    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array, draw_state_array);
1338 }
1339 
1340 static void
init_all_draw_functions(struct zink_context * ctx,pipe_draw_func draw_vbo_array[2][6][2],pipe_draw_vertex_state_func draw_state_array[2][6][2][2])1341 init_all_draw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
1342 {
1343    init_multidraw_functions<ZINK_NO_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
1344    init_multidraw_functions<ZINK_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
1345 }
1346 
1347 template <bool BATCH_CHANGED>
1348 static void
init_grid_batch_changed_functions(struct zink_context * ctx)1349 init_grid_batch_changed_functions(struct zink_context *ctx)
1350 {
1351    ctx->launch_grid[BATCH_CHANGED] = zink_launch_grid<BATCH_CHANGED>;
1352 }
1353 
1354 static void
init_all_grid_functions(struct zink_context * ctx)1355 init_all_grid_functions(struct zink_context *ctx)
1356 {
1357    init_grid_batch_changed_functions<false>(ctx);
1358    init_grid_batch_changed_functions<true>(ctx);
1359 }
1360 
1361 static void
zink_invalid_draw_vbo(struct pipe_context * pipe,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1362 zink_invalid_draw_vbo(struct pipe_context *pipe,
1363                       const struct pipe_draw_info *dinfo,
1364                       unsigned drawid_offset,
1365                       const struct pipe_draw_indirect_info *dindirect,
1366                       const struct pipe_draw_start_count_bias *draws,
1367                       unsigned num_draws)
1368 {
1369    unreachable("vertex shader not bound");
1370 }
1371 
1372 static void
zink_invalid_draw_vertex_state(struct pipe_context * pipe,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1373 zink_invalid_draw_vertex_state(struct pipe_context *pipe,
1374                                struct pipe_vertex_state *vstate,
1375                                uint32_t partial_velem_mask,
1376                                struct pipe_draw_vertex_state_info info,
1377                                const struct pipe_draw_start_count_bias *draws,
1378                                unsigned num_draws)
1379 {
1380    unreachable("vertex shader not bound");
1381 }
1382 
1383 static void
zink_invalid_launch_grid(struct pipe_context * pctx,const struct pipe_grid_info * info)1384 zink_invalid_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
1385 {
1386    unreachable("compute shader not bound");
1387 }
1388 
1389 #define STAGE_BASE 0
1390 #define STAGE_BASE_GS (BITFIELD_BIT(MESA_SHADER_GEOMETRY) >> 1)
1391 #define STAGE_BASE_TES (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) >> 1)
1392 #define STAGE_BASE_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1)
1393 #define STAGE_BASE_TCS_TES ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) >> 1)
1394 #define STAGE_BASE_TCS_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1)
1395 
1396 template <unsigned STAGE_MASK>
1397 static uint32_t
hash_gfx_program(const void * key)1398 hash_gfx_program(const void *key)
1399 {
1400    const struct zink_shader **shaders = (const struct zink_shader**)key;
1401    uint32_t base_hash = shaders[MESA_SHADER_VERTEX]->hash ^ shaders[MESA_SHADER_FRAGMENT]->hash;
1402    if (STAGE_MASK == STAGE_BASE) //VS+FS
1403       return base_hash;
1404    if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS
1405       return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash;
1406    /*VS+TCS+FS isn't a thing */
1407    /*VS+TCS+GS+FS isn't a thing */
1408    if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS
1409       return base_hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
1410    if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS
1411       return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
1412    if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS
1413       return base_hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
1414 
1415    /* all stages */
1416    return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
1417 }
1418 
1419 template <unsigned STAGE_MASK>
1420 static bool
equals_gfx_program(const void * a,const void * b)1421 equals_gfx_program(const void *a, const void *b)
1422 {
1423    const void **sa = (const void**)a;
1424    const void **sb = (const void**)b;
1425    STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
1426    STATIC_ASSERT(MESA_SHADER_TESS_CTRL == 1);
1427    STATIC_ASSERT(MESA_SHADER_TESS_EVAL == 2);
1428    STATIC_ASSERT(MESA_SHADER_GEOMETRY == 3);
1429    STATIC_ASSERT(MESA_SHADER_FRAGMENT == 4);
1430    if (STAGE_MASK == STAGE_BASE) //VS+FS
1431       return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
1432              sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT];
1433    if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS
1434       return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
1435              !memcmp(&sa[MESA_SHADER_GEOMETRY], &sb[MESA_SHADER_GEOMETRY], sizeof(void*) * 2);
1436    /*VS+TCS+FS isn't a thing */
1437    /*VS+TCS+GS+FS isn't a thing */
1438    if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS
1439       return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
1440              sa[MESA_SHADER_TESS_EVAL] == sb[MESA_SHADER_TESS_EVAL] &&
1441              sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT];
1442    if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS
1443       return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
1444              !memcmp(&sa[MESA_SHADER_TESS_EVAL], &sb[MESA_SHADER_TESS_EVAL], sizeof(void*) * 3);
1445    if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS
1446       return !memcmp(sa, sb, sizeof(void*) * 3) &&
1447              sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT];
1448 
1449    /* all stages */
1450    return !memcmp(a, b, sizeof(void*) * ZINK_GFX_SHADER_COUNT);
1451 }
1452 
1453 extern "C"
1454 void
zink_init_draw_functions(struct zink_context * ctx,struct zink_screen * screen)1455 zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen)
1456 {
1457    pipe_draw_func draw_vbo_array[2][6] //multidraw, zink_dynamic_state
1458                                 [2];   //batch changed
1459    pipe_draw_vertex_state_func draw_state_array[2][6] //multidraw, zink_dynamic_state
1460                                                [2][2];   //has_popcnt, batch changed
1461    zink_dynamic_state dynamic;
1462    if (screen->info.have_EXT_extended_dynamic_state) {
1463       if (screen->info.have_EXT_extended_dynamic_state2) {
1464          if (screen->info.have_EXT_extended_dynamic_state3) {
1465             if (screen->info.have_EXT_vertex_input_dynamic_state)
1466                dynamic = ZINK_DYNAMIC_VERTEX_INPUT;
1467             else
1468                dynamic = ZINK_DYNAMIC_STATE3;
1469          } else {
1470             if (screen->info.have_EXT_vertex_input_dynamic_state)
1471                dynamic = ZINK_DYNAMIC_VERTEX_INPUT2;
1472             else
1473                dynamic = ZINK_DYNAMIC_STATE2;
1474          }
1475       } else {
1476          dynamic = ZINK_DYNAMIC_STATE;
1477       }
1478    } else {
1479       dynamic = ZINK_NO_DYNAMIC_STATE;
1480    }
1481    init_all_draw_functions(ctx, draw_vbo_array, draw_state_array);
1482    memcpy(ctx->draw_vbo, &draw_vbo_array[screen->info.have_EXT_multi_draw]
1483                                         [dynamic],
1484                                         sizeof(ctx->draw_vbo));
1485    memcpy(ctx->draw_state, &draw_state_array[screen->info.have_EXT_multi_draw]
1486                                           [dynamic][util_get_cpu_caps()->has_popcnt],
1487                                           sizeof(ctx->draw_state));
1488 
1489    /* Bind a fake draw_vbo, so that draw_vbo isn't NULL, which would skip
1490     * initialization of callbacks in upper layers (such as u_threaded_context).
1491     */
1492    ctx->base.draw_vbo = zink_invalid_draw_vbo;
1493    ctx->base.draw_vertex_state = zink_invalid_draw_vertex_state;
1494 
1495    _mesa_hash_table_init(&ctx->program_cache[0], ctx, hash_gfx_program<0>, equals_gfx_program<0>);
1496    _mesa_hash_table_init(&ctx->program_cache[1], ctx, hash_gfx_program<1>, equals_gfx_program<1>);
1497    _mesa_hash_table_init(&ctx->program_cache[2], ctx, hash_gfx_program<2>, equals_gfx_program<2>);
1498    _mesa_hash_table_init(&ctx->program_cache[3], ctx, hash_gfx_program<3>, equals_gfx_program<3>);
1499    _mesa_hash_table_init(&ctx->program_cache[4], ctx, hash_gfx_program<4>, equals_gfx_program<4>);
1500    _mesa_hash_table_init(&ctx->program_cache[5], ctx, hash_gfx_program<5>, equals_gfx_program<5>);
1501    _mesa_hash_table_init(&ctx->program_cache[6], ctx, hash_gfx_program<6>, equals_gfx_program<6>);
1502    _mesa_hash_table_init(&ctx->program_cache[7], ctx, hash_gfx_program<7>, equals_gfx_program<7>);
1503    for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_lock); i++)
1504       simple_mtx_init(&ctx->program_lock[i], mtx_plain);
1505 }
1506 
1507 void
zink_init_grid_functions(struct zink_context * ctx)1508 zink_init_grid_functions(struct zink_context *ctx)
1509 {
1510    init_all_grid_functions(ctx);
1511    /* Bind a fake launch_grid, so that draw_vbo isn't NULL, which would skip
1512     * initialization of callbacks in upper layers (such as u_threaded_context).
1513     */
1514    ctx->base.launch_grid = zink_invalid_launch_grid;
1515 }
1516 
1517 void
zink_init_screen_pipeline_libs(struct zink_screen * screen)1518 zink_init_screen_pipeline_libs(struct zink_screen *screen)
1519 {
1520    _mesa_set_init(&screen->pipeline_libs[0], screen, hash_gfx_program<0>, equals_gfx_program<0>);
1521    _mesa_set_init(&screen->pipeline_libs[1], screen, hash_gfx_program<1>, equals_gfx_program<1>);
1522    _mesa_set_init(&screen->pipeline_libs[2], screen, hash_gfx_program<2>, equals_gfx_program<2>);
1523    _mesa_set_init(&screen->pipeline_libs[3], screen, hash_gfx_program<3>, equals_gfx_program<3>);
1524    _mesa_set_init(&screen->pipeline_libs[4], screen, hash_gfx_program<4>, equals_gfx_program<4>);
1525    _mesa_set_init(&screen->pipeline_libs[5], screen, hash_gfx_program<5>, equals_gfx_program<5>);
1526    _mesa_set_init(&screen->pipeline_libs[6], screen, hash_gfx_program<6>, equals_gfx_program<6>);
1527    _mesa_set_init(&screen->pipeline_libs[7], screen, hash_gfx_program<7>, equals_gfx_program<7>);
1528    for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++)
1529       simple_mtx_init(&screen->pipeline_libs_lock[i], mtx_plain);
1530 }
1531