1 #include "zink_batch.h"
2 #include "zink_compiler.h"
3 #include "zink_context.h"
4 #include "zink_descriptors.h"
5 #include "zink_program.h"
6 #include "zink_program_state.hpp"
7 #include "zink_query.h"
8 #include "zink_resource.h"
9 #include "zink_screen.h"
10 #include "zink_state.h"
11 #include "zink_surface.h"
12 #include "zink_inlines.h"
13
14 #include "util/hash_table.h"
15 #include "util/u_cpu_detect.h"
16 #include "util/u_debug.h"
17 #include "util/u_helpers.h"
18 #include "util/u_inlines.h"
19 #include "util/u_prim.h"
20 #include "util/u_prim_restart.h"
21
22 static void
zink_emit_xfb_counter_barrier(struct zink_context * ctx)23 zink_emit_xfb_counter_barrier(struct zink_context *ctx)
24 {
25 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
26 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
27 if (!t)
28 continue;
29 struct zink_resource *res = zink_resource(t->counter_buffer);
30 VkAccessFlags access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
31 VkPipelineStageFlags stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
32 if (t->counter_buffer_valid) {
33 /* Between the pause and resume there needs to be a memory barrier for the counter buffers
34 * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
35 * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
36 * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
37 * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
38 *
39 * - from VK_EXT_transform_feedback spec
40 */
41 access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
42 stage |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
43 }
44 zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, access, stage);
45 if (!ctx->unordered_blitting)
46 res->obj->unordered_read = false;
47 }
48 }
49
50 static void
zink_emit_stream_output_targets(struct pipe_context * pctx)51 zink_emit_stream_output_targets(struct pipe_context *pctx)
52 {
53 struct zink_context *ctx = zink_context(pctx);
54 struct zink_batch *batch = &ctx->batch;
55 VkBuffer buffers[PIPE_MAX_SO_BUFFERS] = {0};
56 VkDeviceSize buffer_offsets[PIPE_MAX_SO_BUFFERS] = {0};
57 VkDeviceSize buffer_sizes[PIPE_MAX_SO_BUFFERS] = {0};
58
59 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
60 struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
61 if (!t) {
62 /* no need to reference this or anything */
63 buffers[i] = zink_resource(ctx->dummy_xfb_buffer)->obj->buffer;
64 buffer_offsets[i] = 0;
65 buffer_sizes[i] = sizeof(uint8_t);
66 continue;
67 }
68 struct zink_resource *res = zink_resource(t->base.buffer);
69 if (!res->so_valid)
70 /* resource has been rebound */
71 t->counter_buffer_valid = false;
72 buffers[i] = res->obj->buffer;
73 zink_batch_reference_resource_rw(batch, res, true);
74 buffer_offsets[i] = t->base.buffer_offset;
75 buffer_sizes[i] = t->base.buffer_size;
76 res->so_valid = true;
77 if (!ctx->unordered_blitting) {
78 res->obj->unordered_read = res->obj->unordered_write = false;
79 res->obj->access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
80 res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
81 }
82 util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset,
83 t->base.buffer_offset + t->base.buffer_size);
84 }
85
86 VKCTX(CmdBindTransformFeedbackBuffersEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets,
87 buffers, buffer_offsets,
88 buffer_sizes);
89 ctx->dirty_so_targets = false;
90 }
91
92 ALWAYS_INLINE static void
check_buffer_barrier(struct zink_context * ctx,struct pipe_resource * pres,VkAccessFlags flags,VkPipelineStageFlags pipeline)93 check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline)
94 {
95 struct zink_resource *res = zink_resource(pres);
96 zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, flags, pipeline);
97 if (!ctx->unordered_blitting)
98 res->obj->unordered_read = false;
99 }
100
101 ALWAYS_INLINE static void
barrier_draw_buffers(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_indirect_info * dindirect,struct pipe_resource * index_buffer)102 barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinfo,
103 const struct pipe_draw_indirect_info *dindirect, struct pipe_resource *index_buffer)
104 {
105 if (index_buffer)
106 check_buffer_barrier(ctx, index_buffer, VK_ACCESS_INDEX_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
107 if (dindirect && dindirect->buffer) {
108 check_buffer_barrier(ctx, dindirect->buffer,
109 VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
110 if (dindirect->indirect_draw_count)
111 check_buffer_barrier(ctx, dindirect->indirect_draw_count,
112 VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
113 }
114 }
115
116 static void
bind_vertex_buffers_dgc(struct zink_context * ctx)117 bind_vertex_buffers_dgc(struct zink_context *ctx)
118 {
119 struct zink_vertex_elements_state *elems = ctx->element_state;
120
121 ctx->vertex_buffers_dirty = false;
122 if (!elems->hw_state.num_bindings)
123 return;
124 for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
125 struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->hw_state.binding_map[i];
126 assert(vb);
127 VkBindVertexBufferIndirectCommandNV *ptr;
128 VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr);
129 token->vertexBindingUnit = ctx->element_state->hw_state.binding_map[i];
130 if (vb->buffer.resource) {
131 struct zink_resource *res = zink_resource(vb->buffer.resource);
132 assert(res->obj->bda);
133 ptr->bufferAddress = res->obj->bda + vb->buffer_offset;
134 ptr->size = res->base.b.width0;
135 ptr->stride = ctx->element_state->hw_state.b.strides[i];
136 } else {
137 ptr->bufferAddress = 0;
138 ptr->size = 0;
139 ptr->stride = 0;
140 }
141 }
142 }
143
144 template <zink_dynamic_state DYNAMIC_STATE>
145 static void
zink_bind_vertex_buffers(struct zink_batch * batch,struct zink_context * ctx)146 zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
147 {
148 VkBuffer buffers[PIPE_MAX_ATTRIBS];
149 VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
150 struct zink_vertex_elements_state *elems = ctx->element_state;
151 struct zink_screen *screen = zink_screen(ctx->base.screen);
152
153 for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
154 struct pipe_vertex_buffer *vb = ctx->vertex_buffers + elems->hw_state.binding_map[i];
155 assert(vb);
156 if (vb->buffer.resource) {
157 struct zink_resource *res = zink_resource(vb->buffer.resource);
158 assert(res->obj->buffer);
159 buffers[i] = res->obj->buffer;
160 buffer_offsets[i] = vb->buffer_offset;
161 } else {
162 buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
163 buffer_offsets[i] = 0;
164 }
165 }
166
167 if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE &&
168 DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 &&
169 DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT) {
170 if (elems->hw_state.num_bindings)
171 VKCTX(CmdBindVertexBuffers2)(batch->state->cmdbuf, 0,
172 elems->hw_state.num_bindings,
173 buffers, buffer_offsets, NULL, elems->hw_state.b.strides);
174 } else if (elems->hw_state.num_bindings)
175 VKSCR(CmdBindVertexBuffers)(batch->state->cmdbuf, 0,
176 elems->hw_state.num_bindings,
177 buffers, buffer_offsets);
178
179 if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2 || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT)
180 VKCTX(CmdSetVertexInputEXT)(batch->state->cmdbuf,
181 elems->hw_state.num_bindings, elems->hw_state.dynbindings,
182 elems->hw_state.num_attribs, elems->hw_state.dynattribs);
183
184 ctx->vertex_buffers_dirty = false;
185 }
186
187 ALWAYS_INLINE static void
update_drawid(struct zink_context * ctx,unsigned draw_id)188 update_drawid(struct zink_context *ctx, unsigned draw_id)
189 {
190 VKCTX(CmdPushConstants)(ctx->batch.state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS,
191 offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned),
192 &draw_id);
193 }
194
195 static void
update_drawid_dgc(struct zink_context * ctx,unsigned draw_id)196 update_drawid_dgc(struct zink_context *ctx, unsigned draw_id)
197 {
198 uint32_t *ptr;
199 VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr);
200 token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_id);
201 token->pushconstantSize = sizeof(unsigned);
202 *ptr = draw_id;
203 }
204
205 ALWAYS_INLINE static void
draw_indexed_dgc_need_index_buffer_unref(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)206 draw_indexed_dgc_need_index_buffer_unref(struct zink_context *ctx,
207 const struct pipe_draw_info *dinfo,
208 const struct pipe_draw_start_count_bias *draws,
209 unsigned num_draws,
210 unsigned draw_id,
211 bool needs_drawid)
212 {
213 if (dinfo->increment_draw_id && needs_drawid) {
214 for (unsigned i = 0; i < num_draws; i++) {
215 update_drawid_dgc(ctx, draw_id);
216 VkDrawIndexedIndirectCommand *ptr, cmd = {
217 draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance
218 };
219 zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
220 *ptr = cmd;
221 draw_id++;
222 }
223 } else {
224 if (needs_drawid)
225 update_drawid_dgc(ctx, draw_id);
226 for (unsigned i = 0; i < num_draws; i++) {
227 VkDrawIndexedIndirectCommand *ptr, cmd = {
228 draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance
229 };
230 zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
231 *ptr = cmd;
232 }
233 }
234 }
235
236 ALWAYS_INLINE static void
draw_indexed_need_index_buffer_unref(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)237 draw_indexed_need_index_buffer_unref(struct zink_context *ctx,
238 const struct pipe_draw_info *dinfo,
239 const struct pipe_draw_start_count_bias *draws,
240 unsigned num_draws,
241 unsigned draw_id,
242 bool needs_drawid)
243 {
244 VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
245 if (dinfo->increment_draw_id && needs_drawid) {
246 for (unsigned i = 0; i < num_draws; i++) {
247 update_drawid(ctx, draw_id);
248 VKCTX(CmdDrawIndexed)(cmdbuf,
249 draws[i].count, dinfo->instance_count,
250 0, draws[i].index_bias, dinfo->start_instance);
251 draw_id++;
252 }
253 } else {
254 if (needs_drawid)
255 update_drawid(ctx, draw_id);
256 for (unsigned i = 0; i < num_draws; i++)
257 VKCTX(CmdDrawIndexed)(cmdbuf,
258 draws[i].count, dinfo->instance_count,
259 0, draws[i].index_bias, dinfo->start_instance);
260
261 }
262 }
263
264 ALWAYS_INLINE static void
draw_indexed_dgc(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)265 draw_indexed_dgc(struct zink_context *ctx,
266 const struct pipe_draw_info *dinfo,
267 const struct pipe_draw_start_count_bias *draws,
268 unsigned num_draws,
269 unsigned draw_id,
270 bool needs_drawid)
271 {
272 if (dinfo->increment_draw_id && needs_drawid) {
273 for (unsigned i = 0; i < num_draws; i++) {
274 update_drawid_dgc(ctx, draw_id);
275 VkDrawIndexedIndirectCommand *ptr, cmd = {
276 draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance
277 };
278 zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
279 *ptr = cmd;
280 draw_id++;
281 }
282 } else {
283 if (needs_drawid)
284 update_drawid_dgc(ctx, draw_id);
285 for (unsigned i = 0; i < num_draws; i++) {
286 VkDrawIndexedIndirectCommand *ptr, cmd = {
287 draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance
288 };
289 zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV, (void**)&ptr);
290 *ptr = cmd;
291 }
292 }
293 }
294
295 template <zink_multidraw HAS_MULTIDRAW>
296 ALWAYS_INLINE static void
draw_indexed(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)297 draw_indexed(struct zink_context *ctx,
298 const struct pipe_draw_info *dinfo,
299 const struct pipe_draw_start_count_bias *draws,
300 unsigned num_draws,
301 unsigned draw_id,
302 bool needs_drawid)
303 {
304 VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
305 if (dinfo->increment_draw_id && needs_drawid) {
306 for (unsigned i = 0; i < num_draws; i++) {
307 update_drawid(ctx, draw_id);
308 VKCTX(CmdDrawIndexed)(cmdbuf,
309 draws[i].count, dinfo->instance_count,
310 draws[i].start, draws[i].index_bias, dinfo->start_instance);
311 draw_id++;
312 }
313 } else {
314 if (needs_drawid)
315 update_drawid(ctx, draw_id);
316 if (HAS_MULTIDRAW) {
317 VKCTX(CmdDrawMultiIndexedEXT)(cmdbuf, num_draws, (const VkMultiDrawIndexedInfoEXT*)draws,
318 dinfo->instance_count,
319 dinfo->start_instance, sizeof(struct pipe_draw_start_count_bias),
320 dinfo->index_bias_varies ? NULL : &draws[0].index_bias);
321 } else {
322 for (unsigned i = 0; i < num_draws; i++)
323 VKCTX(CmdDrawIndexed)(cmdbuf,
324 draws[i].count, dinfo->instance_count,
325 draws[i].start, draws[i].index_bias, dinfo->start_instance);
326 }
327 }
328 }
329
330 ALWAYS_INLINE static void
draw_dgc(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)331 draw_dgc(struct zink_context *ctx,
332 const struct pipe_draw_info *dinfo,
333 const struct pipe_draw_start_count_bias *draws,
334 unsigned num_draws,
335 unsigned draw_id,
336 bool needs_drawid)
337 {
338 if (dinfo->increment_draw_id && needs_drawid) {
339 for (unsigned i = 0; i < num_draws; i++) {
340 update_drawid_dgc(ctx, draw_id);
341 VkDrawIndirectCommand *ptr, cmd = {
342 draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance
343 };
344 zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr);
345 *ptr = cmd;
346 draw_id++;
347 }
348 } else {
349 if (needs_drawid)
350 update_drawid_dgc(ctx, draw_id);
351 for (unsigned i = 0; i < num_draws; i++) {
352 VkDrawIndirectCommand *ptr, cmd = {
353 draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance
354 };
355 zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV, (void**)&ptr);
356 *ptr = cmd;
357 }
358 }
359 }
360
361 template <zink_multidraw HAS_MULTIDRAW>
362 ALWAYS_INLINE static void
draw(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)363 draw(struct zink_context *ctx,
364 const struct pipe_draw_info *dinfo,
365 const struct pipe_draw_start_count_bias *draws,
366 unsigned num_draws,
367 unsigned draw_id,
368 bool needs_drawid)
369 {
370 VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
371 if (dinfo->increment_draw_id && needs_drawid) {
372 for (unsigned i = 0; i < num_draws; i++) {
373 update_drawid(ctx, draw_id);
374 VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
375 draw_id++;
376 }
377 } else {
378 if (needs_drawid)
379 update_drawid(ctx, draw_id);
380 if (HAS_MULTIDRAW)
381 VKCTX(CmdDrawMultiEXT)(cmdbuf, num_draws, (const VkMultiDrawInfoEXT*)draws,
382 dinfo->instance_count, dinfo->start_instance,
383 sizeof(struct pipe_draw_start_count_bias));
384 else {
385 for (unsigned i = 0; i < num_draws; i++)
386 VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
387
388 }
389 }
390 }
391
392 template <zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
393 static bool
update_gfx_pipeline(struct zink_context * ctx,struct zink_batch_state * bs,enum mesa_prim mode,bool can_dgc)394 update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum mesa_prim mode, bool can_dgc)
395 {
396 VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
397 const struct zink_screen *screen = zink_screen(ctx->base.screen);
398 bool shaders_changed = ctx->gfx_dirty || ctx->dirty_gfx_stages;
399 if (screen->optimal_keys && !ctx->is_generated_gs_bound)
400 zink_gfx_program_update_optimal(ctx);
401 else
402 zink_gfx_program_update(ctx);
403 bool pipeline_changed = false;
404 VkPipeline pipeline = VK_NULL_HANDLE;
405 if (!ctx->curr_program->base.uses_shobj) {
406 if (screen->info.have_EXT_graphics_pipeline_library)
407 pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
408 else
409 pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
410 }
411 if (pipeline) {
412 pipeline_changed = prev_pipeline != pipeline;
413 if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw) {
414 ctx->dgc.last_prog = ctx->curr_program;
415 if (unlikely(can_dgc && screen->info.nv_dgc_props.maxGraphicsShaderGroupCount == 1)) {
416 VkBindShaderGroupIndirectCommandNV *ptr;
417 zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV, (void**)&ptr);
418 util_dynarray_append(&ctx->dgc.pipelines, VkPipeline, pipeline);
419 /* zero-indexed -> base + group + num_pipelines-1 = base + num_pipelines */
420 ptr->groupIndex = util_dynarray_num_elements(&ctx->dgc.pipelines, VkPipeline) + 1;
421 } else {
422 VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
423 }
424 }
425 ctx->shobj_draw = false;
426 } else {
427 if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) {
428 VkShaderStageFlagBits stages[] = {
429 VK_SHADER_STAGE_VERTEX_BIT,
430 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
431 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
432 VK_SHADER_STAGE_GEOMETRY_BIT,
433 VK_SHADER_STAGE_FRAGMENT_BIT,
434 };
435 /* always rebind all stages */
436 VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects);
437 VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE);
438 VKCTX(CmdSetTessellationDomainOriginEXT)(bs->cmdbuf, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT);
439 VKCTX(CmdSetSampleLocationsEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.sample_locations_enabled);
440 }
441 ctx->shobj_draw = true;
442 }
443 return pipeline_changed;
444 }
445
446 static enum mesa_prim
zink_prim_type(const struct zink_context * ctx,const struct pipe_draw_info * dinfo)447 zink_prim_type(const struct zink_context *ctx,
448 const struct pipe_draw_info *dinfo)
449 {
450 if (ctx->gfx_pipeline_state.shader_rast_prim != MESA_PRIM_COUNT)
451 return ctx->gfx_pipeline_state.shader_rast_prim;
452
453 return u_reduced_prim((enum mesa_prim)dinfo->mode);
454 }
455
456 static enum mesa_prim
zink_rast_prim(const struct zink_context * ctx,const struct pipe_draw_info * dinfo)457 zink_rast_prim(const struct zink_context *ctx,
458 const struct pipe_draw_info *dinfo)
459 {
460 enum mesa_prim prim_type = zink_prim_type(ctx, dinfo);
461 assert(prim_type != MESA_PRIM_COUNT);
462
463 if (prim_type == MESA_PRIM_TRIANGLES &&
464 ctx->rast_state->base.fill_front != PIPE_POLYGON_MODE_FILL) {
465 switch(ctx->rast_state->base.fill_front) {
466 case PIPE_POLYGON_MODE_POINT:
467 return MESA_PRIM_POINTS;
468 case PIPE_POLYGON_MODE_LINE:
469 return MESA_PRIM_LINES;
470 default:
471 unreachable("unexpected polygon mode");
472 }
473 }
474
475 return prim_type;
476 }
477
478 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED, bool DRAW_STATE>
479 void
zink_draw(struct pipe_context * pctx,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask)480 zink_draw(struct pipe_context *pctx,
481 const struct pipe_draw_info *dinfo,
482 unsigned drawid_offset,
483 const struct pipe_draw_indirect_info *dindirect,
484 const struct pipe_draw_start_count_bias *draws,
485 unsigned num_draws,
486 struct pipe_vertex_state *vstate,
487 uint32_t partial_velem_mask)
488 {
489 if (!dindirect && (!draws[0].count || !dinfo->instance_count))
490 return;
491
492 struct zink_context *ctx = zink_context(pctx);
493 struct zink_screen *screen = zink_screen(pctx->screen);
494 struct zink_rasterizer_state *rast_state = ctx->rast_state;
495 struct zink_depth_stencil_alpha_state *dsa_state = ctx->dsa_state;
496 struct zink_batch *batch = &ctx->batch;
497 struct zink_so_target *so_target =
498 dindirect && dindirect->count_from_stream_output ?
499 zink_so_target(dindirect->count_from_stream_output) : NULL;
500 VkBuffer counter_buffers[PIPE_MAX_SO_BUFFERS];
501 VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_BUFFERS];
502 bool need_index_buffer_unref = false;
503 bool mode_changed = ctx->gfx_pipeline_state.gfx_prim_mode != dinfo->mode;
504 bool reads_drawid = ctx->shader_reads_drawid;
505 bool reads_basevertex = ctx->shader_reads_basevertex;
506 unsigned work_count = ctx->batch.work_count;
507 enum mesa_prim mode = (enum mesa_prim)dinfo->mode;
508
509 if (ctx->memory_barrier && !ctx->blitting)
510 zink_flush_memory_barrier(ctx, false);
511
512 if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter && !ctx->blitting)) {
513 ctx->buffer_rebind_counter = screen->buffer_rebind_counter;
514 zink_rebind_all_buffers(ctx);
515 }
516
517 if (unlikely(ctx->image_rebind_counter < screen->image_rebind_counter && !ctx->blitting)) {
518 ctx->image_rebind_counter = screen->image_rebind_counter;
519 zink_rebind_all_images(ctx);
520 }
521
522 if (mode_changed)
523 zink_flush_dgc_if_enabled(ctx);
524
525 unsigned index_offset = 0;
526 unsigned index_size = dinfo->index_size;
527 struct pipe_resource *index_buffer = NULL;
528 if (index_size > 0) {
529 if (dinfo->has_user_indices) {
530 if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer, &index_offset, 4)) {
531 debug_printf("util_upload_index_buffer() failed\n");
532 return;
533 }
534 /* this will have extra refs from tc */
535 if (screen->threaded)
536 zink_batch_reference_resource_move(batch, zink_resource(index_buffer));
537 else
538 zink_batch_reference_resource(batch, zink_resource(index_buffer));
539 } else {
540 index_buffer = dinfo->index.resource;
541 zink_batch_reference_resource_rw(batch, zink_resource(index_buffer), false);
542 }
543 assert(index_size <= 4 && index_size != 3);
544 assert(index_size != 1 || screen->info.have_EXT_index_type_uint8);
545 }
546
547 ctx->was_line_loop = dinfo->was_line_loop;
548
549 bool have_streamout = !!ctx->num_so_targets;
550 if (have_streamout) {
551 zink_emit_xfb_counter_barrier(ctx);
552 if (ctx->dirty_so_targets) {
553 /* have to loop here and below because barriers must be emitted out of renderpass,
554 * but xfb buffers can't be bound before the renderpass is active to avoid
555 * breaking from recursion
556 */
557 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
558 struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
559 if (t) {
560 struct zink_resource *res = zink_resource(t->base.buffer);
561 zink_screen(ctx->base.screen)->buffer_barrier(ctx, res,
562 VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
563 if (!ctx->unordered_blitting)
564 res->obj->unordered_read = res->obj->unordered_write = false;
565 }
566 }
567 }
568 }
569
570 barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer);
571 /* this may re-emit draw buffer barriers, but such synchronization is harmless */
572 if (!ctx->blitting)
573 zink_update_barriers(ctx, false, index_buffer, dindirect ? dindirect->buffer : NULL, dindirect ? dindirect->indirect_draw_count : NULL);
574
575 bool can_dgc = false;
576 if (unlikely(zink_debug & ZINK_DEBUG_DGC))
577 can_dgc = !so_target && !ctx->num_so_targets && (!dindirect || !dindirect->buffer);
578
579 /* ensure synchronization between doing streamout with counter buffer
580 * and using counter buffer for indirect draw
581 */
582 if (so_target && so_target->counter_buffer_valid) {
583 struct zink_resource *res = zink_resource(so_target->counter_buffer);
584 zink_screen(ctx->base.screen)->buffer_barrier(ctx, res,
585 VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
586 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
587 if (!ctx->unordered_blitting)
588 res->obj->unordered_read = false;
589 }
590
591 zink_query_update_gs_states(ctx);
592
593 if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
594 zink_batch_no_rp(ctx);
595 VkMemoryBarrier mb;
596 mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
597 mb.pNext = NULL;
598 mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
599 mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
600 VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
601 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
602 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
603 0, 1, &mb, 0, NULL, 0, NULL);
604 }
605
606 zink_batch_rp(ctx);
607 /* check dead swapchain */
608 if (unlikely(!ctx->batch.in_rp))
609 return;
610
611 if (BATCH_CHANGED)
612 zink_update_descriptor_refs(ctx, false);
613
614 /* these must be after renderpass start to avoid issues with recursion */
615 bool drawid_broken = false;
616 if (reads_drawid && (!dindirect || !dindirect->buffer))
617 drawid_broken = (drawid_offset != 0 ||
618 (!HAS_MULTIDRAW && num_draws > 1) ||
619 (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id));
620 if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid)
621 zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken;
622
623 bool rast_prim_changed = false;
624 bool prim_changed = false;
625 bool rast_state_changed = ctx->rast_state_changed;
626 if (mode_changed || ctx->gfx_pipeline_state.modules_changed ||
627 rast_state_changed) {
628 enum mesa_prim rast_prim = zink_rast_prim(ctx, dinfo);
629 if (rast_prim != ctx->gfx_pipeline_state.rast_prim) {
630 bool points_changed =
631 (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) !=
632 (rast_prim == MESA_PRIM_POINTS);
633
634 prim_changed = ctx->gfx_pipeline_state.rast_prim != rast_prim;
635
636 static bool rect_warned = false;
637 if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3 && rast_prim == MESA_PRIM_LINES && !rect_warned &&
638 (VkLineRasterizationModeEXT)rast_state->hw_state.line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT) {
639 if (screen->info.line_rast_feats.rectangularLines)
640 rect_warned = true;
641 else
642 warn_missing_feature(rect_warned, "rectangularLines");
643 }
644
645 ctx->gfx_pipeline_state.rast_prim = rast_prim;
646 rast_prim_changed = true;
647
648 if (points_changed && ctx->rast_state->base.point_quad_rasterization)
649 zink_set_fs_point_coord_key(ctx);
650 }
651 }
652 ctx->gfx_pipeline_state.gfx_prim_mode = mode;
653
654 if ((mode_changed || prim_changed || rast_state_changed || ctx->gfx_pipeline_state.modules_changed)) {
655 zink_set_primitive_emulation_keys(ctx);
656 }
657
658 if (index_size) {
659 const VkIndexType index_type[3] = {
660 VK_INDEX_TYPE_UINT8_EXT,
661 VK_INDEX_TYPE_UINT16,
662 VK_INDEX_TYPE_UINT32,
663 };
664 struct zink_resource *res = zink_resource(index_buffer);
665 if (unlikely(can_dgc)) {
666 VkBindIndexBufferIndirectCommandNV *ptr;
667 zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV, (void**)&ptr);
668 ptr->bufferAddress = res->obj->bda + index_offset;
669 ptr->size = res->base.b.width0;
670 ptr->indexType = index_type[index_size >> 1];
671 } else {
672 VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]);
673 }
674 }
675 if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2) {
676 if (ctx->gfx_pipeline_state.dyn_state2.primitive_restart != dinfo->primitive_restart)
677 ctx->gfx_pipeline_state.dirty = true;
678 ctx->gfx_pipeline_state.dyn_state2.primitive_restart = dinfo->primitive_restart;
679 }
680
681 if (have_streamout && ctx->dirty_so_targets)
682 zink_emit_stream_output_targets(pctx);
683
684 bool pipeline_changed = update_gfx_pipeline<DYNAMIC_STATE, BATCH_CHANGED>(ctx, batch->state, mode, can_dgc);
685
686 if (BATCH_CHANGED || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
687 VkViewport viewports[PIPE_MAX_VIEWPORTS];
688 for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
689 VkViewport viewport = {
690 ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0],
691 ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1],
692 MAX2(ctx->vp_state.viewport_states[i].scale[0] * 2, 1),
693 ctx->vp_state.viewport_states[i].scale[1] * 2,
694 CLAMP(ctx->rast_state->base.clip_halfz ?
695 ctx->vp_state.viewport_states[i].translate[2] :
696 ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2],
697 0, 1),
698 CLAMP(ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2],
699 0, 1)
700 };
701 if (!ctx->rast_state->base.half_pixel_center) {
702 /* magic constant value from dxvk */
703 float cf = 0.5f - (1.0f / 128.0f);
704 viewport.x += cf;
705 if (viewport.height < 0)
706 viewport.y += cf;
707 else
708 viewport.y -= cf;
709 }
710 viewports[i] = viewport;
711 }
712 if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
713 VKCTX(CmdSetViewportWithCount)(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports);
714 else
715 VKCTX(CmdSetViewport)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, viewports);
716 }
717 if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
718 VkRect2D scissors[PIPE_MAX_VIEWPORTS];
719 if (ctx->rast_state->base.scissor) {
720 for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
721 scissors[i].offset.x = ctx->vp_state.scissor_states[i].minx;
722 scissors[i].offset.y = ctx->vp_state.scissor_states[i].miny;
723 scissors[i].extent.width = ctx->vp_state.scissor_states[i].maxx - ctx->vp_state.scissor_states[i].minx;
724 scissors[i].extent.height = ctx->vp_state.scissor_states[i].maxy - ctx->vp_state.scissor_states[i].miny;
725 }
726 } else {
727 for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
728 scissors[i].offset.x = 0;
729 scissors[i].offset.y = 0;
730 scissors[i].extent.width = ctx->fb_state.width;
731 scissors[i].extent.height = ctx->fb_state.height;
732 }
733 }
734 if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
735 VKCTX(CmdSetScissorWithCount)(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors);
736 else
737 VKCTX(CmdSetScissor)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, scissors);
738 }
739 ctx->vp_state_changed = false;
740 ctx->scissor_changed = false;
741
742 if (BATCH_CHANGED || ctx->stencil_ref_changed) {
743 VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
744 ctx->stencil_ref.ref_value[0]);
745 VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
746 ctx->stencil_ref.ref_value[1]);
747 ctx->stencil_ref_changed = false;
748 }
749
750 if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) {
751 VKCTX(CmdSetDepthBoundsTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_bounds_test);
752 if (dsa_state->hw_state.depth_bounds_test)
753 VKCTX(CmdSetDepthBounds)(batch->state->cmdbuf,
754 dsa_state->hw_state.min_depth_bounds,
755 dsa_state->hw_state.max_depth_bounds);
756 VKCTX(CmdSetDepthTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_test);
757 VKCTX(CmdSetDepthCompareOp)(batch->state->cmdbuf, dsa_state->hw_state.depth_compare_op);
758 VKCTX(CmdSetDepthWriteEnable)(batch->state->cmdbuf, dsa_state->hw_state.depth_write);
759 VKCTX(CmdSetStencilTestEnable)(batch->state->cmdbuf, dsa_state->hw_state.stencil_test);
760 if (dsa_state->hw_state.stencil_test) {
761 VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
762 dsa_state->hw_state.stencil_front.failOp,
763 dsa_state->hw_state.stencil_front.passOp,
764 dsa_state->hw_state.stencil_front.depthFailOp,
765 dsa_state->hw_state.stencil_front.compareOp);
766 VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
767 dsa_state->hw_state.stencil_back.failOp,
768 dsa_state->hw_state.stencil_back.passOp,
769 dsa_state->hw_state.stencil_back.depthFailOp,
770 dsa_state->hw_state.stencil_back.compareOp);
771 if (dsa_state->base.stencil[1].enabled) {
772 VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.writeMask);
773 VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.writeMask);
774 VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.compareMask);
775 VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.compareMask);
776 } else {
777 VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
778 VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
779 }
780 } else {
781 VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
782 VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
783 VKCTX(CmdSetStencilOp)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS);
784 }
785 }
786 ctx->dsa_state_changed = false;
787
788 if (BATCH_CHANGED || rast_state_changed) {
789 if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) {
790 VKCTX(CmdSetFrontFace)(batch->state->cmdbuf, (VkFrontFace)ctx->gfx_pipeline_state.dyn_state1.front_face);
791 VKCTX(CmdSetCullMode)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.cull_mode);
792 }
793
794 if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3) {
795 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE))
796 VKCTX(CmdSetLineStippleEXT)(batch->state->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern);
797 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP))
798 VKCTX(CmdSetDepthClipEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.depth_clip);
799 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLAMP))
800 VKCTX(CmdSetDepthClampEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.depth_clamp);
801 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_POLYGON))
802 VKCTX(CmdSetPolygonModeEXT)(batch->state->cmdbuf, (VkPolygonMode)rast_state->hw_state.polygon_mode);
803 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_HALFZ))
804 VKCTX(CmdSetDepthClipNegativeOneToOneEXT)(batch->state->cmdbuf, !rast_state->hw_state.clip_halfz);
805 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_PV))
806 VKCTX(CmdSetProvokingVertexModeEXT)(batch->state->cmdbuf,
807 rast_state->hw_state.pv_last ?
808 VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT :
809 VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
810 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP))
811 VKCTX(CmdSetLineRasterizationModeEXT)(batch->state->cmdbuf, rast_state->dynamic_line_mode);
812 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE_ON))
813 VKCTX(CmdSetLineStippleEnableEXT)(batch->state->cmdbuf, rast_state->hw_state.line_stipple_enable);
814 }
815 }
816 if ((BATCH_CHANGED || ctx->sample_mask_changed) && screen->have_full_ds3) {
817 VKCTX(CmdSetRasterizationSamplesEXT)(batch->state->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1));
818 VKCTX(CmdSetSampleMaskEXT)(batch->state->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1), &ctx->gfx_pipeline_state.sample_mask);
819 ctx->sample_mask_changed = false;
820 }
821 if ((BATCH_CHANGED || ctx->blend_state_changed)) {
822 if (ctx->gfx_pipeline_state.blend_state) {
823 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A2C))
824 VKCTX(CmdSetAlphaToCoverageEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_coverage &&
825 ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0));
826 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A21))
827 VKCTX(CmdSetAlphaToOneEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_one);
828 if (ctx->fb_state.nr_cbufs) {
829 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_ON))
830 VKCTX(CmdSetColorBlendEnableEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.enables);
831 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_WRITE))
832 VKCTX(CmdSetColorWriteMaskEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.wrmask);
833 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_EQ))
834 VKCTX(CmdSetColorBlendEquationEXT)(batch->state->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.eq);
835 }
836 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC_ON))
837 VKCTX(CmdSetLogicOpEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_enable);
838 if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC))
839 VKCTX(CmdSetLogicOpEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_func);
840 }
841 }
842 ctx->ds3_states = 0;
843
844 if (BATCH_CHANGED ||
845 /* only re-emit on non-batch change when actually drawing lines */
846 ((ctx->line_width_changed || rast_prim_changed) && ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES)) {
847 VKCTX(CmdSetLineWidth)(batch->state->cmdbuf, rast_state->line_width);
848 ctx->line_width_changed = false;
849 }
850
851 if (BATCH_CHANGED || mode_changed ||
852 ctx->gfx_pipeline_state.modules_changed ||
853 rast_state_changed) {
854 bool depth_bias =
855 zink_prim_type(ctx, dinfo) == MESA_PRIM_TRIANGLES &&
856 rast_state->offset_fill;
857
858 if (depth_bias) {
859 if (rast_state->base.offset_units_unscaled) {
860 VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units * ctx->depth_bias_scale_factor, rast_state->offset_clamp, rast_state->offset_scale);
861 } else {
862 VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
863 }
864 } else {
865 VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, 0.0f, 0.0f, 0.0f);
866 }
867 }
868 ctx->rast_state_changed = false;
869
870 if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) {
871 if (ctx->sample_locations_changed) {
872 VkSampleLocationsInfoEXT loc;
873 zink_init_vk_sample_locations(ctx, &loc);
874 VKCTX(CmdSetSampleLocationsEXT)(batch->state->cmdbuf, &loc);
875 }
876 ctx->sample_locations_changed = false;
877 }
878
879 if (BATCH_CHANGED || ctx->blend_color_changed) {
880 VKCTX(CmdSetBlendConstants)(batch->state->cmdbuf, ctx->blend_constants);
881 }
882 ctx->blend_state_changed = false;
883 ctx->blend_color_changed = false;
884
885 if (!DRAW_STATE) {
886 if (BATCH_CHANGED || ctx->vertex_buffers_dirty) {
887 if (unlikely(can_dgc))
888 bind_vertex_buffers_dgc(ctx);
889 else if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || ctx->gfx_pipeline_state.uses_dynamic_stride)
890 zink_bind_vertex_buffers<DYNAMIC_STATE>(batch, ctx);
891 else
892 zink_bind_vertex_buffers<ZINK_NO_DYNAMIC_STATE>(batch, ctx);
893 }
894 }
895
896 if (BATCH_CHANGED) {
897 ctx->pipeline_changed[0] = false;
898 zink_select_draw_vbo(ctx);
899 }
900
901 if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || mode_changed))
902 VKCTX(CmdSetPrimitiveTopology)(batch->state->cmdbuf, zink_primitive_topology(mode));
903
904 if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) {
905 VKCTX(CmdSetPrimitiveRestartEnable)(batch->state->cmdbuf, dinfo->primitive_restart);
906 ctx->primitive_restart = dinfo->primitive_restart;
907 }
908
909 if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->rasterizer_discard_changed)) {
910 VKCTX(CmdSetRasterizerDiscardEnable)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard);
911 ctx->rasterizer_discard_changed = false;
912 }
913
914 if (zink_program_has_descriptors(&ctx->curr_program->base))
915 zink_descriptors_update(ctx, false);
916
917 if (ctx->di.any_bindless_dirty &&
918 /* some apps (d3dretrace) call MakeTextureHandleResidentARB randomly */
919 zink_program_has_descriptors(&ctx->curr_program->base) &&
920 ctx->curr_program->base.dd.bindless)
921 zink_descriptors_update_bindless(ctx);
922
923 if (reads_basevertex) {
924 unsigned draw_mode_is_indexed = index_size > 0;
925 if (unlikely(can_dgc)) {
926 uint32_t *ptr;
927 VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr);
928 token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
929 token->pushconstantSize = sizeof(unsigned);
930 *ptr = draw_mode_is_indexed;
931 } else {
932 VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS,
933 offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned),
934 &draw_mode_is_indexed);
935 }
936 }
937 if (ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL] &&
938 ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) {
939 if (unlikely(can_dgc)) {
940 float *ptr;
941 VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV, (void**)&ptr);
942 token->pushconstantOffset = offsetof(struct zink_gfx_push_constant, default_inner_level);
943 token->pushconstantSize = sizeof(float) * 6;
944 memcpy(ptr, &ctx->tess_levels[0], sizeof(float) * 6);
945 } else {
946 VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS,
947 offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6,
948 &ctx->tess_levels[0]);
949 }
950 }
951
952 if (!screen->optimal_keys) {
953 if (zink_get_fs_key(ctx)->lower_line_stipple ||
954 zink_get_gs_key(ctx)->lower_gl_point ||
955 zink_get_fs_key(ctx)->lower_line_smooth) {
956
957 assert(zink_get_gs_key(ctx)->lower_line_stipple ==
958 zink_get_fs_key(ctx)->lower_line_stipple);
959
960 assert(zink_get_gs_key(ctx)->lower_line_smooth ==
961 zink_get_fs_key(ctx)->lower_line_smooth);
962
963 float viewport_scale[2] = {
964 ctx->vp_state.viewport_states[0].scale[0],
965 ctx->vp_state.viewport_states[0].scale[1]
966 };
967 VKCTX(CmdPushConstants)(batch->state->cmdbuf,
968 ctx->curr_program->base.layout,
969 VK_SHADER_STAGE_ALL_GRAPHICS,
970 offsetof(struct zink_gfx_push_constant, viewport_scale),
971 sizeof(float) * 2, &viewport_scale);
972
973 uint32_t stipple = ctx->rast_state->base.line_stipple_pattern;
974 stipple |= ctx->rast_state->base.line_stipple_factor << 16;
975 VKCTX(CmdPushConstants)(batch->state->cmdbuf,
976 ctx->curr_program->base.layout,
977 VK_SHADER_STAGE_ALL_GRAPHICS,
978 offsetof(struct zink_gfx_push_constant, line_stipple_pattern),
979 sizeof(uint32_t), &stipple);
980
981 if (ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.lower_line_smooth) {
982 float line_width = ctx->rast_state->base.line_width;
983 VKCTX(CmdPushConstants)(batch->state->cmdbuf,
984 ctx->curr_program->base.layout,
985 VK_SHADER_STAGE_ALL_GRAPHICS,
986 offsetof(struct zink_gfx_push_constant, line_width),
987 sizeof(uint32_t), &line_width);
988 }
989 }
990 }
991
992 if (have_streamout) {
993 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
994 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
995 counter_buffers[i] = VK_NULL_HANDLE;
996 if (t) {
997 struct zink_resource *res = zink_resource(t->counter_buffer);
998 t->stride = ctx->last_vertex_stage->sinfo.stride[i];
999 zink_batch_reference_resource_rw(batch, res, true);
1000 if (!ctx->unordered_blitting)
1001 res->obj->unordered_read = res->obj->unordered_write = false;
1002 if (t->counter_buffer_valid) {
1003 counter_buffers[i] = res->obj->buffer;
1004 counter_buffer_offsets[i] = t->counter_buffer_offset;
1005 }
1006 }
1007 }
1008 VKCTX(CmdBeginTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
1009 }
1010
1011 bool marker = false;
1012 if (unlikely(zink_tracing)) {
1013 VkViewport viewport = {
1014 ctx->vp_state.viewport_states[0].translate[0] - ctx->vp_state.viewport_states[0].scale[0],
1015 ctx->vp_state.viewport_states[0].translate[1] - ctx->vp_state.viewport_states[0].scale[1],
1016 MAX2(ctx->vp_state.viewport_states[0].scale[0] * 2, 1),
1017 ctx->vp_state.viewport_states[0].scale[1] * 2,
1018 CLAMP(ctx->rast_state->base.clip_halfz ?
1019 ctx->vp_state.viewport_states[0].translate[2] :
1020 ctx->vp_state.viewport_states[0].translate[2] - ctx->vp_state.viewport_states[0].scale[2],
1021 0, 1),
1022 CLAMP(ctx->vp_state.viewport_states[0].translate[2] + ctx->vp_state.viewport_states[0].scale[2],
1023 0, 1)
1024 };
1025 if (ctx->blitting) {
1026 bool is_zs = util_format_is_depth_or_stencil(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format);
1027 marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "u_blitter(%s->%s, %dx%d)",
1028 util_format_short_name(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format),
1029 util_format_short_name((is_zs ? ctx->fb_state.zsbuf : ctx->fb_state.cbufs[0])->format),
1030 lround(viewport.width), lround(viewport.height));
1031 } else {
1032 marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "draw(%u cbufs|%s, %dx%d)",
1033 ctx->fb_state.nr_cbufs,
1034 ctx->fb_state.zsbuf ? "zsbuf" : "",
1035 lround(viewport.width), lround(viewport.height));
1036 }
1037 }
1038
1039 bool needs_drawid = reads_drawid && zink_get_last_vertex_key(ctx)->push_drawid;
1040 work_count += num_draws;
1041 if (index_size > 0) {
1042 if (dindirect && dindirect->buffer) {
1043 assert(num_draws == 1);
1044 if (needs_drawid)
1045 update_drawid(ctx, drawid_offset);
1046 struct zink_resource *indirect = zink_resource(dindirect->buffer);
1047 zink_batch_reference_resource_rw(batch, indirect, false);
1048 if (dindirect->indirect_draw_count) {
1049 struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
1050 zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
1051 VKCTX(CmdDrawIndexedIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
1052 indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
1053 dindirect->draw_count, dindirect->stride);
1054 } else
1055 VKCTX(CmdDrawIndexedIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
1056 } else {
1057 if (unlikely(can_dgc)) {
1058 if (need_index_buffer_unref)
1059 draw_indexed_dgc_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1060 else
1061 draw_indexed_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1062 } else if (need_index_buffer_unref) {
1063 draw_indexed_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1064 } else {
1065 draw_indexed<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1066 }
1067 }
1068 } else {
1069 if (so_target && screen->info.tf_props.transformFeedbackDraw) {
1070 /* GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_api attempts a bogus xfb
1071 * draw using a streamout target that has no data
1072 * to avoid hanging the gpu, reject any such draws
1073 */
1074 if (so_target->counter_buffer_valid) {
1075 if (needs_drawid)
1076 update_drawid(ctx, drawid_offset);
1077 zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false);
1078 zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true);
1079 VKCTX(CmdDrawIndirectByteCountEXT)(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance,
1080 zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0,
1081 MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride));
1082 }
1083 } else if (dindirect && dindirect->buffer) {
1084 assert(num_draws == 1);
1085 if (needs_drawid)
1086 update_drawid(ctx, drawid_offset);
1087 struct zink_resource *indirect = zink_resource(dindirect->buffer);
1088 zink_batch_reference_resource_rw(batch, indirect, false);
1089 if (dindirect->indirect_draw_count) {
1090 struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
1091 zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
1092 VKCTX(CmdDrawIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
1093 indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
1094 dindirect->draw_count, dindirect->stride);
1095 } else
1096 VKCTX(CmdDrawIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
1097 } else {
1098 if (unlikely(can_dgc))
1099 draw_dgc(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1100 else
1101 draw<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
1102 }
1103 }
1104
1105 if (unlikely(zink_tracing))
1106 zink_cmd_debug_marker_end(ctx, batch->state->cmdbuf, marker);
1107
1108 ctx->dgc.valid = can_dgc;
1109 if (have_streamout) {
1110 for (unsigned i = 0; i < ctx->num_so_targets; i++) {
1111 struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
1112 if (t) {
1113 counter_buffers[i] = zink_resource(t->counter_buffer)->obj->buffer;
1114 counter_buffer_offsets[i] = t->counter_buffer_offset;
1115 t->counter_buffer_valid = true;
1116 }
1117 }
1118 VKCTX(CmdEndTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
1119 }
1120
1121 batch->has_work = true;
1122 batch->last_was_compute = false;
1123 ctx->batch.work_count = work_count;
1124 /* flush if there's >100k draws */
1125 if (!ctx->unordered_blitting && (unlikely(work_count >= 30000) || ctx->oom_flush))
1126 pctx->flush(pctx, NULL, 0);
1127 }
1128
1129 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
1130 static void
zink_draw_vbo(struct pipe_context * pctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1131 zink_draw_vbo(struct pipe_context *pctx,
1132 const struct pipe_draw_info *info,
1133 unsigned drawid_offset,
1134 const struct pipe_draw_indirect_info *indirect,
1135 const struct pipe_draw_start_count_bias *draws,
1136 unsigned num_draws)
1137 {
1138 zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, false>(pctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0);
1139 }
1140
1141 template <util_popcnt HAS_POPCNT>
1142 static void
zink_vertex_state_mask(struct zink_context * ctx,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask)1143 zink_vertex_state_mask(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask)
1144 {
1145 struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate;
1146 VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
1147
1148 if (partial_velem_mask == vstate->input.full_velem_mask) {
1149 VKCTX(CmdSetVertexInputEXT)(cmdbuf,
1150 zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings,
1151 zstate->velems.hw_state.num_attribs, zstate->velems.hw_state.dynattribs);
1152 return;
1153 }
1154
1155 VkVertexInputAttributeDescription2EXT dynattribs[PIPE_MAX_ATTRIBS];
1156 unsigned num_attribs = 0;
1157 u_foreach_bit(elem, vstate->input.full_velem_mask & partial_velem_mask) {
1158 unsigned idx = util_bitcount_fast<HAS_POPCNT>(vstate->input.full_velem_mask & BITFIELD_MASK(elem));
1159 dynattribs[num_attribs] = zstate->velems.hw_state.dynattribs[idx];
1160 dynattribs[num_attribs].location = num_attribs;
1161 num_attribs++;
1162 }
1163
1164 VKCTX(CmdSetVertexInputEXT)(cmdbuf,
1165 zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings,
1166 num_attribs, dynattribs);
1167 }
1168
1169 template <util_popcnt HAS_POPCNT>
1170 static void
zink_bind_vertex_state(struct zink_context * ctx,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask)1171 zink_bind_vertex_state(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask)
1172 {
1173 struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate;
1174 VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
1175 if (!vstate->input.vbuffer.buffer.resource)
1176 return;
1177
1178 zink_vertex_state_mask<HAS_POPCNT>(ctx, vstate, partial_velem_mask);
1179
1180 struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
1181 zink_batch_resource_usage_set(&ctx->batch, res, false, true);
1182 VkDeviceSize offset = vstate->input.vbuffer.buffer_offset;
1183 if (unlikely(zink_debug & ZINK_DEBUG_DGC)) {
1184 VkBindVertexBufferIndirectCommandNV *ptr;
1185 VkIndirectCommandsLayoutTokenNV *token = zink_dgc_add_token(ctx, VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV, (void**)&ptr);
1186 token->vertexBindingUnit = 0;
1187 token->vertexDynamicStride = VK_FALSE;
1188 ptr->bufferAddress = res->obj->bda + offset;
1189 ptr->size = res->base.b.width0;
1190 ptr->stride = 0;
1191 } else {
1192 VKCTX(CmdBindVertexBuffers)(cmdbuf, 0,
1193 zstate->velems.hw_state.num_bindings,
1194 &res->obj->buffer, &offset);
1195 }
1196 }
1197
1198 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, util_popcnt HAS_POPCNT, bool BATCH_CHANGED>
1199 static void
zink_draw_vertex_state(struct pipe_context * pctx,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1200 zink_draw_vertex_state(struct pipe_context *pctx,
1201 struct pipe_vertex_state *vstate,
1202 uint32_t partial_velem_mask,
1203 struct pipe_draw_vertex_state_info info,
1204 const struct pipe_draw_start_count_bias *draws,
1205 unsigned num_draws)
1206 {
1207 struct pipe_draw_info dinfo = {};
1208
1209 dinfo.mode = info.mode;
1210 dinfo.index_size = 4;
1211 dinfo.instance_count = 1;
1212 dinfo.index.resource = vstate->input.indexbuf;
1213 struct zink_context *ctx = zink_context(pctx);
1214 struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
1215 zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
1216 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
1217 if (!ctx->unordered_blitting)
1218 res->obj->unordered_read = false;
1219 zink_bind_vertex_state<HAS_POPCNT>(ctx, vstate, partial_velem_mask);
1220
1221 zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, true>(pctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask);
1222 /* ensure ctx->vertex_buffers gets rebound on next non-vstate draw */
1223 ctx->vertex_buffers_dirty = true;
1224
1225 if (info.take_vertex_state_ownership)
1226 pipe_vertex_state_reference(&vstate, NULL);
1227 }
1228
1229 template <bool BATCH_CHANGED>
1230 static void
zink_launch_grid(struct pipe_context * pctx,const struct pipe_grid_info * info)1231 zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
1232 {
1233 struct zink_context *ctx = zink_context(pctx);
1234 struct zink_screen *screen = zink_screen(pctx->screen);
1235 struct zink_batch *batch = &ctx->batch;
1236
1237 if (ctx->render_condition_active)
1238 zink_start_conditional_render(ctx);
1239
1240 if (info->indirect) {
1241 /*
1242 VK_ACCESS_INDIRECT_COMMAND_READ_BIT specifies read access to indirect command data read as
1243 part of an indirect build, trace, drawing or dispatching command. Such access occurs in the
1244 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT pipeline stage.
1245
1246 - Chapter 7. Synchronization and Cache Control
1247 */
1248 check_buffer_barrier(ctx, info->indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
1249 }
1250
1251 zink_update_barriers(ctx, true, NULL, info->indirect, NULL);
1252 if (ctx->memory_barrier)
1253 zink_flush_memory_barrier(ctx, true);
1254
1255 if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
1256 zink_batch_no_rp(ctx);
1257 VkMemoryBarrier mb;
1258 mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
1259 mb.pNext = NULL;
1260 mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
1261 mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1262 VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
1263 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
1264 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
1265 0, 1, &mb, 0, NULL, 0, NULL);
1266 }
1267
1268 zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info);
1269 VkPipeline prev_pipeline = ctx->compute_pipeline_state.pipeline;
1270
1271 if (BATCH_CHANGED) {
1272 zink_update_descriptor_refs(ctx, true);
1273 }
1274 if (ctx->compute_dirty) {
1275 /* update inlinable constants */
1276 zink_update_compute_program(ctx);
1277 ctx->compute_dirty = false;
1278 }
1279
1280 VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute,
1281 &ctx->compute_pipeline_state);
1282
1283 if (prev_pipeline != pipeline || BATCH_CHANGED)
1284 VKCTX(CmdBindPipeline)(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1285 if (BATCH_CHANGED) {
1286 ctx->pipeline_changed[1] = false;
1287 zink_select_launch_grid(ctx);
1288 }
1289
1290 if (zink_program_has_descriptors(&ctx->curr_compute->base))
1291 zink_descriptors_update(ctx, true);
1292 if (ctx->di.any_bindless_dirty && ctx->curr_compute->base.dd.bindless)
1293 zink_descriptors_update_bindless(ctx);
1294
1295 batch->work_count++;
1296 zink_batch_no_rp(ctx);
1297 if (!ctx->queries_disabled)
1298 zink_resume_cs_query(ctx);
1299 if (info->indirect) {
1300 VKCTX(CmdDispatchIndirect)(batch->state->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset);
1301 zink_batch_reference_resource_rw(batch, zink_resource(info->indirect), false);
1302 } else
1303 VKCTX(CmdDispatch)(batch->state->cmdbuf, info->grid[0], info->grid[1], info->grid[2]);
1304 batch->has_work = true;
1305 batch->last_was_compute = true;
1306 /* flush if there's >100k computes */
1307 if (!ctx->unordered_blitting && (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush))
1308 pctx->flush(pctx, NULL, 0);
1309 }
1310
1311 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
1312 static void
init_batch_changed_functions(struct zink_context * ctx,pipe_draw_func draw_vbo_array[2][6][2],pipe_draw_vertex_state_func draw_state_array[2][6][2][2])1313 init_batch_changed_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
1314 {
1315 draw_vbo_array[HAS_MULTIDRAW][DYNAMIC_STATE][BATCH_CHANGED] = zink_draw_vbo<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED>;
1316 draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][0][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, POPCNT_NO, BATCH_CHANGED>;
1317 draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][1][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, POPCNT_YES, BATCH_CHANGED>;
1318 }
1319
1320 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE>
1321 static void
init_dynamic_state_functions(struct zink_context * ctx,pipe_draw_func draw_vbo_array[2][6][2],pipe_draw_vertex_state_func draw_state_array[2][6][2][2])1322 init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
1323 {
1324 init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, false>(ctx, draw_vbo_array, draw_state_array);
1325 init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, true>(ctx, draw_vbo_array, draw_state_array);
1326 }
1327
1328 template <zink_multidraw HAS_MULTIDRAW>
1329 static void
init_multidraw_functions(struct zink_context * ctx,pipe_draw_func draw_vbo_array[2][6][2],pipe_draw_vertex_state_func draw_state_array[2][6][2][2])1330 init_multidraw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
1331 {
1332 init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_NO_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
1333 init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
1334 init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE2>(ctx, draw_vbo_array, draw_state_array);
1335 init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT2>(ctx, draw_vbo_array, draw_state_array);
1336 init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE3>(ctx, draw_vbo_array, draw_state_array);
1337 init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array, draw_state_array);
1338 }
1339
1340 static void
init_all_draw_functions(struct zink_context * ctx,pipe_draw_func draw_vbo_array[2][6][2],pipe_draw_vertex_state_func draw_state_array[2][6][2][2])1341 init_all_draw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2])
1342 {
1343 init_multidraw_functions<ZINK_NO_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
1344 init_multidraw_functions<ZINK_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
1345 }
1346
1347 template <bool BATCH_CHANGED>
1348 static void
init_grid_batch_changed_functions(struct zink_context * ctx)1349 init_grid_batch_changed_functions(struct zink_context *ctx)
1350 {
1351 ctx->launch_grid[BATCH_CHANGED] = zink_launch_grid<BATCH_CHANGED>;
1352 }
1353
1354 static void
init_all_grid_functions(struct zink_context * ctx)1355 init_all_grid_functions(struct zink_context *ctx)
1356 {
1357 init_grid_batch_changed_functions<false>(ctx);
1358 init_grid_batch_changed_functions<true>(ctx);
1359 }
1360
1361 static void
zink_invalid_draw_vbo(struct pipe_context * pipe,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1362 zink_invalid_draw_vbo(struct pipe_context *pipe,
1363 const struct pipe_draw_info *dinfo,
1364 unsigned drawid_offset,
1365 const struct pipe_draw_indirect_info *dindirect,
1366 const struct pipe_draw_start_count_bias *draws,
1367 unsigned num_draws)
1368 {
1369 unreachable("vertex shader not bound");
1370 }
1371
1372 static void
zink_invalid_draw_vertex_state(struct pipe_context * pipe,struct pipe_vertex_state * vstate,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)1373 zink_invalid_draw_vertex_state(struct pipe_context *pipe,
1374 struct pipe_vertex_state *vstate,
1375 uint32_t partial_velem_mask,
1376 struct pipe_draw_vertex_state_info info,
1377 const struct pipe_draw_start_count_bias *draws,
1378 unsigned num_draws)
1379 {
1380 unreachable("vertex shader not bound");
1381 }
1382
1383 static void
zink_invalid_launch_grid(struct pipe_context * pctx,const struct pipe_grid_info * info)1384 zink_invalid_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
1385 {
1386 unreachable("compute shader not bound");
1387 }
1388
1389 #define STAGE_BASE 0
1390 #define STAGE_BASE_GS (BITFIELD_BIT(MESA_SHADER_GEOMETRY) >> 1)
1391 #define STAGE_BASE_TES (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) >> 1)
1392 #define STAGE_BASE_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1)
1393 #define STAGE_BASE_TCS_TES ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) >> 1)
1394 #define STAGE_BASE_TCS_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1)
1395
1396 template <unsigned STAGE_MASK>
1397 static uint32_t
hash_gfx_program(const void * key)1398 hash_gfx_program(const void *key)
1399 {
1400 const struct zink_shader **shaders = (const struct zink_shader**)key;
1401 uint32_t base_hash = shaders[MESA_SHADER_VERTEX]->hash ^ shaders[MESA_SHADER_FRAGMENT]->hash;
1402 if (STAGE_MASK == STAGE_BASE) //VS+FS
1403 return base_hash;
1404 if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS
1405 return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash;
1406 /*VS+TCS+FS isn't a thing */
1407 /*VS+TCS+GS+FS isn't a thing */
1408 if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS
1409 return base_hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
1410 if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS
1411 return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
1412 if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS
1413 return base_hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
1414
1415 /* all stages */
1416 return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash;
1417 }
1418
1419 template <unsigned STAGE_MASK>
1420 static bool
equals_gfx_program(const void * a,const void * b)1421 equals_gfx_program(const void *a, const void *b)
1422 {
1423 const void **sa = (const void**)a;
1424 const void **sb = (const void**)b;
1425 STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
1426 STATIC_ASSERT(MESA_SHADER_TESS_CTRL == 1);
1427 STATIC_ASSERT(MESA_SHADER_TESS_EVAL == 2);
1428 STATIC_ASSERT(MESA_SHADER_GEOMETRY == 3);
1429 STATIC_ASSERT(MESA_SHADER_FRAGMENT == 4);
1430 if (STAGE_MASK == STAGE_BASE) //VS+FS
1431 return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
1432 sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT];
1433 if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS
1434 return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
1435 !memcmp(&sa[MESA_SHADER_GEOMETRY], &sb[MESA_SHADER_GEOMETRY], sizeof(void*) * 2);
1436 /*VS+TCS+FS isn't a thing */
1437 /*VS+TCS+GS+FS isn't a thing */
1438 if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS
1439 return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
1440 sa[MESA_SHADER_TESS_EVAL] == sb[MESA_SHADER_TESS_EVAL] &&
1441 sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT];
1442 if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS
1443 return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] &&
1444 !memcmp(&sa[MESA_SHADER_TESS_EVAL], &sb[MESA_SHADER_TESS_EVAL], sizeof(void*) * 3);
1445 if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS
1446 return !memcmp(sa, sb, sizeof(void*) * 3) &&
1447 sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT];
1448
1449 /* all stages */
1450 return !memcmp(a, b, sizeof(void*) * ZINK_GFX_SHADER_COUNT);
1451 }
1452
1453 extern "C"
1454 void
zink_init_draw_functions(struct zink_context * ctx,struct zink_screen * screen)1455 zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen)
1456 {
1457 pipe_draw_func draw_vbo_array[2][6] //multidraw, zink_dynamic_state
1458 [2]; //batch changed
1459 pipe_draw_vertex_state_func draw_state_array[2][6] //multidraw, zink_dynamic_state
1460 [2][2]; //has_popcnt, batch changed
1461 zink_dynamic_state dynamic;
1462 if (screen->info.have_EXT_extended_dynamic_state) {
1463 if (screen->info.have_EXT_extended_dynamic_state2) {
1464 if (screen->info.have_EXT_extended_dynamic_state3) {
1465 if (screen->info.have_EXT_vertex_input_dynamic_state)
1466 dynamic = ZINK_DYNAMIC_VERTEX_INPUT;
1467 else
1468 dynamic = ZINK_DYNAMIC_STATE3;
1469 } else {
1470 if (screen->info.have_EXT_vertex_input_dynamic_state)
1471 dynamic = ZINK_DYNAMIC_VERTEX_INPUT2;
1472 else
1473 dynamic = ZINK_DYNAMIC_STATE2;
1474 }
1475 } else {
1476 dynamic = ZINK_DYNAMIC_STATE;
1477 }
1478 } else {
1479 dynamic = ZINK_NO_DYNAMIC_STATE;
1480 }
1481 init_all_draw_functions(ctx, draw_vbo_array, draw_state_array);
1482 memcpy(ctx->draw_vbo, &draw_vbo_array[screen->info.have_EXT_multi_draw]
1483 [dynamic],
1484 sizeof(ctx->draw_vbo));
1485 memcpy(ctx->draw_state, &draw_state_array[screen->info.have_EXT_multi_draw]
1486 [dynamic][util_get_cpu_caps()->has_popcnt],
1487 sizeof(ctx->draw_state));
1488
1489 /* Bind a fake draw_vbo, so that draw_vbo isn't NULL, which would skip
1490 * initialization of callbacks in upper layers (such as u_threaded_context).
1491 */
1492 ctx->base.draw_vbo = zink_invalid_draw_vbo;
1493 ctx->base.draw_vertex_state = zink_invalid_draw_vertex_state;
1494
1495 _mesa_hash_table_init(&ctx->program_cache[0], ctx, hash_gfx_program<0>, equals_gfx_program<0>);
1496 _mesa_hash_table_init(&ctx->program_cache[1], ctx, hash_gfx_program<1>, equals_gfx_program<1>);
1497 _mesa_hash_table_init(&ctx->program_cache[2], ctx, hash_gfx_program<2>, equals_gfx_program<2>);
1498 _mesa_hash_table_init(&ctx->program_cache[3], ctx, hash_gfx_program<3>, equals_gfx_program<3>);
1499 _mesa_hash_table_init(&ctx->program_cache[4], ctx, hash_gfx_program<4>, equals_gfx_program<4>);
1500 _mesa_hash_table_init(&ctx->program_cache[5], ctx, hash_gfx_program<5>, equals_gfx_program<5>);
1501 _mesa_hash_table_init(&ctx->program_cache[6], ctx, hash_gfx_program<6>, equals_gfx_program<6>);
1502 _mesa_hash_table_init(&ctx->program_cache[7], ctx, hash_gfx_program<7>, equals_gfx_program<7>);
1503 for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_lock); i++)
1504 simple_mtx_init(&ctx->program_lock[i], mtx_plain);
1505 }
1506
1507 void
zink_init_grid_functions(struct zink_context * ctx)1508 zink_init_grid_functions(struct zink_context *ctx)
1509 {
1510 init_all_grid_functions(ctx);
1511 /* Bind a fake launch_grid, so that draw_vbo isn't NULL, which would skip
1512 * initialization of callbacks in upper layers (such as u_threaded_context).
1513 */
1514 ctx->base.launch_grid = zink_invalid_launch_grid;
1515 }
1516
1517 void
zink_init_screen_pipeline_libs(struct zink_screen * screen)1518 zink_init_screen_pipeline_libs(struct zink_screen *screen)
1519 {
1520 _mesa_set_init(&screen->pipeline_libs[0], screen, hash_gfx_program<0>, equals_gfx_program<0>);
1521 _mesa_set_init(&screen->pipeline_libs[1], screen, hash_gfx_program<1>, equals_gfx_program<1>);
1522 _mesa_set_init(&screen->pipeline_libs[2], screen, hash_gfx_program<2>, equals_gfx_program<2>);
1523 _mesa_set_init(&screen->pipeline_libs[3], screen, hash_gfx_program<3>, equals_gfx_program<3>);
1524 _mesa_set_init(&screen->pipeline_libs[4], screen, hash_gfx_program<4>, equals_gfx_program<4>);
1525 _mesa_set_init(&screen->pipeline_libs[5], screen, hash_gfx_program<5>, equals_gfx_program<5>);
1526 _mesa_set_init(&screen->pipeline_libs[6], screen, hash_gfx_program<6>, equals_gfx_program<6>);
1527 _mesa_set_init(&screen->pipeline_libs[7], screen, hash_gfx_program<7>, equals_gfx_program<7>);
1528 for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++)
1529 simple_mtx_init(&screen->pipeline_libs_lock[i], mtx_plain);
1530 }
1531