1 /*
2 * Copyright © 2024 Collabora Ltd.
3 *
4 * Derived from tu_cmd_buffer.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * SPDX-License-Identifier: MIT
10 */
11
12 #include "genxml/gen_macros.h"
13
14 #include "panvk_buffer.h"
15 #include "panvk_cmd_alloc.h"
16 #include "panvk_cmd_buffer.h"
17 #include "panvk_cmd_desc_state.h"
18 #include "panvk_cmd_meta.h"
19 #include "panvk_device.h"
20 #include "panvk_entrypoints.h"
21 #include "panvk_image.h"
22 #include "panvk_image_view.h"
23 #include "panvk_instance.h"
24 #include "panvk_priv_bo.h"
25 #include "panvk_shader.h"
26
27 #include "pan_desc.h"
28 #include "pan_earlyzs.h"
29 #include "pan_encoder.h"
30 #include "pan_format.h"
31 #include "pan_jc.h"
32 #include "pan_props.h"
33 #include "pan_shader.h"
34
35 #include "vk_format.h"
36 #include "vk_meta.h"
37 #include "vk_pipeline_layout.h"
38
39 struct panvk_draw_data {
40 struct panvk_draw_info info;
41 unsigned vertex_range;
42 unsigned padded_vertex_count;
43 struct mali_invocation_packed invocation;
44 struct {
45 uint64_t varyings;
46 uint64_t attributes;
47 uint64_t attribute_bufs;
48 } vs;
49 struct {
50 uint64_t rsd;
51 uint64_t varyings;
52 } fs;
53 uint64_t varying_bufs;
54 uint64_t position;
55 uint64_t indices;
56 union {
57 uint64_t psiz;
58 float line_width;
59 };
60 uint64_t tls;
61 uint64_t fb;
62 const struct pan_tiler_context *tiler_ctx;
63 uint64_t viewport;
64 struct {
65 struct panfrost_ptr vertex_copy_desc;
66 struct panfrost_ptr frag_copy_desc;
67 union {
68 struct {
69 struct panfrost_ptr vertex;
70 struct panfrost_ptr tiler;
71 };
72 struct panfrost_ptr idvs;
73 };
74 } jobs;
75 };
76
77 static bool
has_depth_att(struct panvk_cmd_buffer * cmdbuf)78 has_depth_att(struct panvk_cmd_buffer *cmdbuf)
79 {
80 return (cmdbuf->state.gfx.render.bound_attachments &
81 MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0;
82 }
83
84 static bool
has_stencil_att(struct panvk_cmd_buffer * cmdbuf)85 has_stencil_att(struct panvk_cmd_buffer *cmdbuf)
86 {
87 return (cmdbuf->state.gfx.render.bound_attachments &
88 MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0;
89 }
90
91 static bool
writes_depth(struct panvk_cmd_buffer * cmdbuf)92 writes_depth(struct panvk_cmd_buffer *cmdbuf)
93 {
94 const struct vk_depth_stencil_state *ds =
95 &cmdbuf->vk.dynamic_graphics_state.ds;
96
97 return has_depth_att(cmdbuf) && ds->depth.test_enable &&
98 ds->depth.write_enable && ds->depth.compare_op != VK_COMPARE_OP_NEVER;
99 }
100
101 static bool
writes_stencil(struct panvk_cmd_buffer * cmdbuf)102 writes_stencil(struct panvk_cmd_buffer *cmdbuf)
103 {
104 const struct vk_depth_stencil_state *ds =
105 &cmdbuf->vk.dynamic_graphics_state.ds;
106
107 return has_stencil_att(cmdbuf) && ds->stencil.test_enable &&
108 ((ds->stencil.front.write_mask &&
109 (ds->stencil.front.op.fail != VK_STENCIL_OP_KEEP ||
110 ds->stencil.front.op.pass != VK_STENCIL_OP_KEEP ||
111 ds->stencil.front.op.depth_fail != VK_STENCIL_OP_KEEP)) ||
112 (ds->stencil.back.write_mask &&
113 (ds->stencil.back.op.fail != VK_STENCIL_OP_KEEP ||
114 ds->stencil.back.op.pass != VK_STENCIL_OP_KEEP ||
115 ds->stencil.back.op.depth_fail != VK_STENCIL_OP_KEEP)));
116 }
117
118 static bool
ds_test_always_passes(struct panvk_cmd_buffer * cmdbuf)119 ds_test_always_passes(struct panvk_cmd_buffer *cmdbuf)
120 {
121 const struct vk_depth_stencil_state *ds =
122 &cmdbuf->vk.dynamic_graphics_state.ds;
123
124 if (!has_depth_att(cmdbuf))
125 return true;
126
127 if (ds->depth.test_enable && ds->depth.compare_op != VK_COMPARE_OP_ALWAYS)
128 return false;
129
130 if (ds->stencil.test_enable &&
131 (ds->stencil.front.op.compare != VK_COMPARE_OP_ALWAYS ||
132 ds->stencil.back.op.compare != VK_COMPARE_OP_ALWAYS))
133 return false;
134
135 return true;
136 }
137
138 static inline enum mali_func
translate_compare_func(VkCompareOp comp)139 translate_compare_func(VkCompareOp comp)
140 {
141 STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER);
142 STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS);
143 STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL);
144 STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL);
145 STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER);
146 STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL);
147 STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL ==
148 (VkCompareOp)MALI_FUNC_GEQUAL);
149 STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS);
150
151 return (enum mali_func)comp;
152 }
153
154 static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)155 translate_stencil_op(VkStencilOp in)
156 {
157 switch (in) {
158 case VK_STENCIL_OP_KEEP:
159 return MALI_STENCIL_OP_KEEP;
160 case VK_STENCIL_OP_ZERO:
161 return MALI_STENCIL_OP_ZERO;
162 case VK_STENCIL_OP_REPLACE:
163 return MALI_STENCIL_OP_REPLACE;
164 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
165 return MALI_STENCIL_OP_INCR_SAT;
166 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
167 return MALI_STENCIL_OP_DECR_SAT;
168 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
169 return MALI_STENCIL_OP_INCR_WRAP;
170 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
171 return MALI_STENCIL_OP_DECR_WRAP;
172 case VK_STENCIL_OP_INVERT:
173 return MALI_STENCIL_OP_INVERT;
174 default:
175 unreachable("Invalid stencil op");
176 }
177 }
178
179 static VkResult
panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)180 panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
181 struct panvk_draw_data *draw)
182 {
183 bool dirty = dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) ||
184 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE) ||
185 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
186 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) ||
187 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) ||
188 dyn_gfx_state_dirty(cmdbuf, CB_LOGIC_OP_ENABLE) ||
189 dyn_gfx_state_dirty(cmdbuf, CB_LOGIC_OP) ||
190 dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||
191 dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||
192 dyn_gfx_state_dirty(cmdbuf, CB_BLEND_ENABLES) ||
193 dyn_gfx_state_dirty(cmdbuf, CB_BLEND_EQUATIONS) ||
194 dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) ||
195 dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS) ||
196 dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) ||
197 dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) ||
198 dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
199 dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
200 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_TEST_ENABLE) ||
201 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_OP) ||
202 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_COMPARE_MASK) ||
203 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
204 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_REFERENCE) ||
205 dyn_gfx_state_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
206 dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) ||
207 dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
208 dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) ||
209 gfx_state_dirty(cmdbuf, FS) || gfx_state_dirty(cmdbuf, OQ) ||
210 gfx_state_dirty(cmdbuf, RENDER_STATE);
211
212 if (!dirty) {
213 draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
214 return VK_SUCCESS;
215 }
216
217 const struct vk_dynamic_graphics_state *dyns =
218 &cmdbuf->vk.dynamic_graphics_state;
219 const struct vk_rasterization_state *rs = &dyns->rs;
220 const struct vk_color_blend_state *cb = &dyns->cb;
221 const struct vk_depth_stencil_state *ds = &dyns->ds;
222 const struct panvk_shader *fs = get_fs(cmdbuf);
223 const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
224 unsigned bd_count = MAX2(cb->attachment_count, 1);
225 bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable;
226 bool test_z = has_depth_att(cmdbuf) && ds->depth.test_enable;
227 bool writes_z = writes_depth(cmdbuf);
228 bool writes_s = writes_stencil(cmdbuf);
229
230 struct panfrost_ptr ptr = panvk_cmd_alloc_desc_aggregate(
231 cmdbuf, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(bd_count, BLEND));
232 if (!ptr.gpu)
233 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
234
235 struct mali_renderer_state_packed *rsd = ptr.cpu;
236 struct mali_blend_packed *bds = ptr.cpu + pan_size(RENDERER_STATE);
237 struct panvk_blend_info *binfo = &cmdbuf->state.gfx.cb.info;
238
239 uint64_t fs_code = panvk_shader_get_dev_addr(fs);
240
241 if (fs_info != NULL) {
242 panvk_per_arch(blend_emit_descs)(cmdbuf, bds);
243 } else {
244 for (unsigned i = 0; i < bd_count; i++) {
245 pan_pack(&bds[i], BLEND, cfg) {
246 cfg.enable = false;
247 cfg.internal.mode = MALI_BLEND_MODE_OFF;
248 }
249 }
250 }
251
252 pan_pack(rsd, RENDERER_STATE, cfg) {
253 bool alpha_to_coverage = dyns->ms.alpha_to_coverage_enable;
254 bool msaa = dyns->ms.rasterization_samples > 1;
255
256 if (fs) {
257 pan_shader_prepare_rsd(fs_info, fs_code, &cfg);
258
259 if (binfo->shader_loads_blend_const) {
260 /* Preload the blend constant if the blend shader depends on it. */
261 cfg.preload.uniform_count =
262 MAX2(cfg.preload.uniform_count,
263 DIV_ROUND_UP(SYSVALS_PUSH_CONST_BASE +
264 sizeof(struct panvk_graphics_sysvals),
265 8));
266 }
267
268 uint8_t rt_written = fs_info->outputs_written >> FRAG_RESULT_DATA0;
269 uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
270 MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
271 cfg.properties.allow_forward_pixel_to_kill =
272 fs_info->fs.can_fpk && !(rt_mask & ~rt_written) &&
273 !alpha_to_coverage && !binfo->any_dest_read;
274
275 bool writes_zs = writes_z || writes_s;
276 bool zs_always_passes = ds_test_always_passes(cmdbuf);
277 bool oq = cmdbuf->state.gfx.occlusion_query.mode !=
278 MALI_OCCLUSION_MODE_DISABLED;
279
280 struct pan_earlyzs_state earlyzs =
281 pan_earlyzs_get(pan_earlyzs_analyze(fs_info), writes_zs || oq,
282 alpha_to_coverage, zs_always_passes);
283
284 cfg.properties.pixel_kill_operation = earlyzs.kill;
285 cfg.properties.zs_update_operation = earlyzs.update;
286 cfg.multisample_misc.evaluate_per_sample =
287 (fs->info.fs.sample_shading && msaa);
288 } else {
289 cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
290 cfg.properties.allow_forward_pixel_to_kill = true;
291 cfg.properties.allow_forward_pixel_to_be_killed = true;
292 cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY;
293 }
294
295 cfg.multisample_misc.multisample_enable = msaa;
296 cfg.multisample_misc.sample_mask =
297 msaa ? dyns->ms.sample_mask : UINT16_MAX;
298
299 cfg.multisample_misc.depth_function =
300 test_z ? translate_compare_func(ds->depth.compare_op)
301 : MALI_FUNC_ALWAYS;
302
303 cfg.multisample_misc.depth_write_mask = writes_z;
304 cfg.multisample_misc.fixed_function_near_discard =
305 cfg.multisample_misc.fixed_function_far_discard =
306 vk_rasterization_state_depth_clip_enable(rs);
307 cfg.multisample_misc.fixed_function_depth_range_fixed =
308 !rs->depth_clamp_enable;
309 cfg.multisample_misc.shader_depth_range_fixed = true;
310
311 cfg.stencil_mask_misc.stencil_enable = test_s;
312 cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
313 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
314 cfg.stencil_mask_misc.front_facing_depth_bias = rs->depth_bias.enable;
315 cfg.stencil_mask_misc.back_facing_depth_bias = rs->depth_bias.enable;
316
317 cfg.depth_units = rs->depth_bias.constant_factor;
318 cfg.depth_factor = rs->depth_bias.slope_factor;
319 cfg.depth_bias_clamp = rs->depth_bias.clamp;
320
321 cfg.stencil_front.mask = ds->stencil.front.compare_mask;
322 cfg.stencil_back.mask = ds->stencil.back.compare_mask;
323
324 cfg.stencil_mask_misc.stencil_mask_front = ds->stencil.front.write_mask;
325 cfg.stencil_mask_misc.stencil_mask_back = ds->stencil.back.write_mask;
326
327 cfg.stencil_front.reference_value = ds->stencil.front.reference;
328 cfg.stencil_back.reference_value = ds->stencil.back.reference;
329
330 if (test_s) {
331 cfg.stencil_front.compare_function =
332 translate_compare_func(ds->stencil.front.op.compare);
333 cfg.stencil_front.stencil_fail =
334 translate_stencil_op(ds->stencil.front.op.fail);
335 cfg.stencil_front.depth_fail =
336 translate_stencil_op(ds->stencil.front.op.depth_fail);
337 cfg.stencil_front.depth_pass =
338 translate_stencil_op(ds->stencil.front.op.pass);
339 cfg.stencil_back.compare_function =
340 translate_compare_func(ds->stencil.back.op.compare);
341 cfg.stencil_back.stencil_fail =
342 translate_stencil_op(ds->stencil.back.op.fail);
343 cfg.stencil_back.depth_fail =
344 translate_stencil_op(ds->stencil.back.op.depth_fail);
345 cfg.stencil_back.depth_pass =
346 translate_stencil_op(ds->stencil.back.op.pass);
347 }
348 }
349
350 cmdbuf->state.gfx.fs.rsd = ptr.gpu;
351 draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
352 return VK_SUCCESS;
353 }
354
355 static VkResult
panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)356 panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
357 struct panvk_draw_data *draw)
358 {
359 struct panvk_batch *batch = cmdbuf->cur_batch;
360 VkResult result =
361 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, draw->info.layer_id);
362 if (result != VK_SUCCESS)
363 return result;
364
365 draw->tiler_ctx = &batch->tiler.ctx;
366 return VK_SUCCESS;
367 }
368
369 static mali_pixel_format
panvk_varying_hw_format(gl_shader_stage stage,gl_varying_slot loc,enum pipe_format pfmt)370 panvk_varying_hw_format(gl_shader_stage stage, gl_varying_slot loc,
371 enum pipe_format pfmt)
372 {
373 switch (loc) {
374 case VARYING_SLOT_PNTC:
375 case VARYING_SLOT_PSIZ:
376 #if PAN_ARCH <= 6
377 return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
378 #else
379 return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
380 #endif
381 case VARYING_SLOT_POS:
382 #if PAN_ARCH <= 6
383 return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
384 #else
385 return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
386 #endif
387 default:
388 if (pfmt != PIPE_FORMAT_NONE)
389 return GENX(panfrost_format_from_pipe_format)(pfmt)->hw;
390
391 #if PAN_ARCH >= 7
392 return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
393 #else
394 return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
395 #endif
396 }
397 }
398
399 static VkResult
panvk_draw_prepare_varyings(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)400 panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
401 struct panvk_draw_data *draw)
402 {
403 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
404 const struct panvk_shader_link *link = &cmdbuf->state.gfx.link;
405 struct panfrost_ptr bufs = panvk_cmd_alloc_desc_array(
406 cmdbuf, PANVK_VARY_BUF_MAX + 1, ATTRIBUTE_BUFFER);
407 if (!bufs.gpu)
408 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
409
410 struct mali_attribute_buffer_packed *buf_descs = bufs.cpu;
411 const struct vk_input_assembly_state *ia =
412 &cmdbuf->vk.dynamic_graphics_state.ia;
413 bool writes_point_size =
414 vs->info.vs.writes_point_size &&
415 ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
416 unsigned vertex_count =
417 draw->padded_vertex_count * draw->info.instance.count;
418 uint64_t psiz_buf = 0;
419
420 for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
421 unsigned buf_size = vertex_count * link->buf_strides[i];
422 uint64_t buf_addr =
423 buf_size ? panvk_cmd_alloc_dev_mem(cmdbuf, varying, buf_size, 64).gpu
424 : 0;
425 if (buf_size && !buf_addr)
426 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
427
428 pan_pack(&buf_descs[i], ATTRIBUTE_BUFFER, cfg) {
429 cfg.stride = link->buf_strides[i];
430 cfg.size = buf_size;
431 cfg.pointer = buf_addr;
432 }
433
434 if (i == PANVK_VARY_BUF_POSITION)
435 draw->position = buf_addr;
436
437 if (i == PANVK_VARY_BUF_PSIZ)
438 psiz_buf = buf_addr;
439 }
440
441 /* We need an empty entry to stop prefetching on Bifrost */
442 memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * PANVK_VARY_BUF_MAX), 0,
443 pan_size(ATTRIBUTE_BUFFER));
444
445 if (writes_point_size)
446 draw->psiz = psiz_buf;
447 else if (ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
448 ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
449 draw->line_width = cmdbuf->vk.dynamic_graphics_state.rs.line.width;
450 else
451 draw->line_width = 1.0f;
452
453 draw->varying_bufs = bufs.gpu;
454 draw->vs.varyings = panvk_priv_mem_dev_addr(link->vs.attribs);
455 draw->fs.varyings = panvk_priv_mem_dev_addr(link->fs.attribs);
456 return VK_SUCCESS;
457 }
458
459 static void
panvk_draw_emit_attrib_buf(const struct panvk_draw_data * draw,const struct vk_vertex_binding_state * buf_info,const struct panvk_attrib_buf * buf,struct mali_attribute_buffer_packed * desc)460 panvk_draw_emit_attrib_buf(const struct panvk_draw_data *draw,
461 const struct vk_vertex_binding_state *buf_info,
462 const struct panvk_attrib_buf *buf,
463 struct mali_attribute_buffer_packed *desc)
464 {
465 uint64_t addr = buf->address & ~63ULL;
466 unsigned size = buf->size + (buf->address & 63);
467 unsigned divisor = draw->padded_vertex_count * buf_info->divisor;
468 bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
469 struct mali_attribute_buffer_packed *buf_ext = &desc[1];
470
471 /* TODO: support instanced arrays */
472 if (draw->info.instance.count <= 1) {
473 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
474 cfg.type = MALI_ATTRIBUTE_TYPE_1D;
475 cfg.stride = per_instance ? 0 : buf_info->stride;
476 cfg.pointer = addr;
477 cfg.size = size;
478 }
479 } else if (!per_instance) {
480 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
481 cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
482 cfg.divisor = draw->padded_vertex_count;
483 cfg.stride = buf_info->stride;
484 cfg.pointer = addr;
485 cfg.size = size;
486 }
487 } else if (!divisor) {
488 /* instance_divisor == 0 means all instances share the same value.
489 * Make it a 1D array with a zero stride.
490 */
491 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
492 cfg.type = MALI_ATTRIBUTE_TYPE_1D;
493 cfg.stride = 0;
494 cfg.pointer = addr;
495 cfg.size = size;
496 }
497 } else if (util_is_power_of_two_or_zero(divisor)) {
498 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
499 cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
500 cfg.stride = buf_info->stride;
501 cfg.pointer = addr;
502 cfg.size = size;
503 cfg.divisor_r = __builtin_ctz(divisor);
504 }
505 } else {
506 unsigned divisor_r = 0, divisor_e = 0;
507 unsigned divisor_num =
508 panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e);
509 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
510 cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
511 cfg.stride = buf_info->stride;
512 cfg.pointer = addr;
513 cfg.size = size;
514 cfg.divisor_r = divisor_r;
515 cfg.divisor_e = divisor_e;
516 }
517
518 pan_cast_and_pack(buf_ext, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
519 cfg.divisor_numerator = divisor_num;
520 cfg.divisor = buf_info->divisor;
521 }
522
523 buf_ext = NULL;
524 }
525
526 /* If the buffer extension wasn't used, memset(0) */
527 if (buf_ext)
528 memset(buf_ext, 0, pan_size(ATTRIBUTE_BUFFER));
529 }
530
531 static void
panvk_draw_emit_attrib(const struct panvk_draw_data * draw,const struct vk_vertex_attribute_state * attrib_info,const struct vk_vertex_binding_state * buf_info,const struct panvk_attrib_buf * buf,struct mali_attribute_packed * desc)532 panvk_draw_emit_attrib(const struct panvk_draw_data *draw,
533 const struct vk_vertex_attribute_state *attrib_info,
534 const struct vk_vertex_binding_state *buf_info,
535 const struct panvk_attrib_buf *buf,
536 struct mali_attribute_packed *desc)
537 {
538 bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
539 enum pipe_format f = vk_format_to_pipe_format(attrib_info->format);
540 unsigned buf_idx = attrib_info->binding;
541
542 pan_pack(desc, ATTRIBUTE, cfg) {
543 cfg.buffer_index = buf_idx * 2;
544 cfg.offset = attrib_info->offset + (buf->address & 63);
545 cfg.offset_enable = true;
546
547 if (per_instance)
548 cfg.offset += draw->info.instance.base * buf_info->stride;
549
550 cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw;
551 }
552 }
553
554 static VkResult
panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)555 panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
556 struct panvk_draw_data *draw)
557 {
558 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
559 const struct vk_vertex_input_state *vi =
560 cmdbuf->vk.dynamic_graphics_state.vi;
561 unsigned num_imgs = vs->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_IMG];
562 unsigned num_vs_attribs = util_last_bit(vi->attributes_valid);
563 unsigned num_vbs = util_last_bit(vi->bindings_valid);
564 unsigned attrib_count =
565 num_imgs ? MAX_VS_ATTRIBS + num_imgs : num_vs_attribs;
566 bool dirty =
567 dyn_gfx_state_dirty(cmdbuf, VI) ||
568 dyn_gfx_state_dirty(cmdbuf, VI_BINDINGS_VALID) ||
569 dyn_gfx_state_dirty(cmdbuf, VI_BINDING_STRIDES) ||
570 gfx_state_dirty(cmdbuf, VB) ||
571 gfx_state_dirty(cmdbuf, DESC_STATE);
572
573 if (!dirty)
574 return VK_SUCCESS;
575
576 unsigned attrib_buf_count = (num_vbs + num_imgs) * 2;
577 struct panfrost_ptr bufs = panvk_cmd_alloc_desc_array(
578 cmdbuf, attrib_buf_count + 1, ATTRIBUTE_BUFFER);
579 struct mali_attribute_buffer_packed *attrib_buf_descs = bufs.cpu;
580 struct panfrost_ptr attribs =
581 panvk_cmd_alloc_desc_array(cmdbuf, attrib_count, ATTRIBUTE);
582 struct mali_attribute_packed *attrib_descs = attribs.cpu;
583
584 if (!bufs.gpu || (attrib_count && !attribs.gpu))
585 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
586
587 for (unsigned i = 0; i < num_vbs; i++) {
588 if (vi->bindings_valid & BITFIELD_BIT(i)) {
589 panvk_draw_emit_attrib_buf(draw, &vi->bindings[i],
590 &cmdbuf->state.gfx.vb.bufs[i],
591 &attrib_buf_descs[i * 2]);
592 } else {
593 memset(&attrib_buf_descs[i * 2], 0, sizeof(*attrib_buf_descs) * 2);
594 }
595 }
596
597 for (unsigned i = 0; i < num_vs_attribs; i++) {
598 if (vi->attributes_valid & BITFIELD_BIT(i)) {
599 unsigned buf_idx = vi->attributes[i].binding;
600 panvk_draw_emit_attrib(
601 draw, &vi->attributes[i], &vi->bindings[buf_idx],
602 &cmdbuf->state.gfx.vb.bufs[buf_idx], &attrib_descs[i]);
603 } else {
604 memset(&attrib_descs[i], 0, sizeof(attrib_descs[0]));
605 }
606 }
607
608 /* A NULL entry is needed to stop prefecting on Bifrost */
609 memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
610 pan_size(ATTRIBUTE_BUFFER));
611
612 cmdbuf->state.gfx.vs.attrib_bufs = bufs.gpu;
613 cmdbuf->state.gfx.vs.attribs = attribs.gpu;
614
615 if (num_imgs) {
616 cmdbuf->state.gfx.vs.desc.img_attrib_table =
617 attribs.gpu + (MAX_VS_ATTRIBS * pan_size(ATTRIBUTE));
618 cmdbuf->state.gfx.vs.desc.tables[PANVK_BIFROST_DESC_TABLE_IMG] =
619 bufs.gpu + (num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2);
620 }
621
622 return VK_SUCCESS;
623 }
624
625 static void
panvk_draw_prepare_attributes(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)626 panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
627 struct panvk_draw_data *draw)
628 {
629 panvk_draw_prepare_vs_attribs(cmdbuf, draw);
630 draw->vs.attributes = cmdbuf->state.gfx.vs.attribs;
631 draw->vs.attribute_bufs = cmdbuf->state.gfx.vs.attrib_bufs;
632 }
633
634 static void
panvk_emit_viewport(struct panvk_cmd_buffer * cmdbuf,struct mali_viewport_packed * vpd)635 panvk_emit_viewport(struct panvk_cmd_buffer *cmdbuf,
636 struct mali_viewport_packed *vpd)
637 {
638 const struct vk_viewport_state *vp = &cmdbuf->vk.dynamic_graphics_state.vp;
639
640 if (vp->viewport_count < 1)
641 return;
642
643 struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals;
644 const VkViewport *viewport = &vp->viewports[0];
645 const VkRect2D *scissor = &vp->scissors[0];
646 float minz = sysvals->viewport.offset.z;
647 float maxz = minz + sysvals->viewport.scale.z;
648
649 /* The spec says "width must be greater than 0.0" */
650 assert(viewport->width >= 0);
651 int minx = (int)viewport->x;
652 int maxx = (int)(viewport->x + viewport->width);
653
654 /* Viewport height can be negative */
655 int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
656 int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
657
658 assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
659 minx = MAX2(scissor->offset.x, minx);
660 miny = MAX2(scissor->offset.y, miny);
661 maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
662 maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
663
664 /* Make sure we don't end up with a max < min when width/height is 0 */
665 maxx = maxx > minx ? maxx - 1 : maxx;
666 maxy = maxy > miny ? maxy - 1 : maxy;
667
668 /* Clamp viewport scissor to valid range */
669 minx = CLAMP(minx, 0, UINT16_MAX);
670 maxx = CLAMP(maxx, 0, UINT16_MAX);
671 miny = CLAMP(miny, 0, UINT16_MAX);
672 maxy = CLAMP(maxy, 0, UINT16_MAX);
673
674 pan_pack(vpd, VIEWPORT, cfg) {
675 cfg.scissor_minimum_x = minx;
676 cfg.scissor_minimum_y = miny;
677 cfg.scissor_maximum_x = maxx;
678 cfg.scissor_maximum_y = maxy;
679 cfg.minimum_z = MIN2(minz, maxz);
680 cfg.maximum_z = MAX2(minz, maxz);
681 }
682 }
683
684 static VkResult
panvk_draw_prepare_viewport(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)685 panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
686 struct panvk_draw_data *draw)
687 {
688 /* When rasterizerDiscardEnable is active, it is allowed to have viewport and
689 * scissor disabled.
690 * As a result, we define an empty one.
691 */
692 if (!cmdbuf->state.gfx.vpd || dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
693 dyn_gfx_state_dirty(cmdbuf, VP_SCISSORS) ||
694 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
695 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) {
696 struct panfrost_ptr vp = panvk_cmd_alloc_desc(cmdbuf, VIEWPORT);
697 if (!vp.gpu)
698 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
699
700 panvk_emit_viewport(cmdbuf, vp.cpu);
701 cmdbuf->state.gfx.vpd = vp.gpu;
702 }
703
704 draw->viewport = cmdbuf->state.gfx.vpd;
705 return VK_SUCCESS;
706 }
707
708 static void
panvk_emit_vertex_dcd(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_data * draw,struct mali_draw_packed * dcd)709 panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf,
710 const struct panvk_draw_data *draw,
711 struct mali_draw_packed *dcd)
712 {
713 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
714 const struct panvk_shader_desc_state *vs_desc_state =
715 &cmdbuf->state.gfx.vs.desc;
716
717 pan_pack(dcd, DRAW, cfg) {
718 cfg.state = panvk_priv_mem_dev_addr(vs->rsd);
719 cfg.attributes = draw->vs.attributes;
720 cfg.attribute_buffers = draw->vs.attribute_bufs;
721 cfg.varyings = draw->vs.varyings;
722 cfg.varying_buffers = draw->varying_bufs;
723 cfg.thread_storage = draw->tls;
724 cfg.offset_start = draw->info.vertex.raw_offset;
725 cfg.instance_size =
726 draw->info.instance.count > 1 ? draw->padded_vertex_count : 1;
727 cfg.uniform_buffers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
728 cfg.push_uniforms = cmdbuf->state.gfx.vs.push_uniforms;
729 cfg.textures = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
730 cfg.samplers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
731 }
732 }
733
734 static VkResult
panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)735 panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
736 struct panvk_draw_data *draw)
737 {
738 struct panvk_batch *batch = cmdbuf->cur_batch;
739 struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB);
740 if (!ptr.gpu)
741 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
742
743 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
744 draw->jobs.vertex = ptr;
745
746 memcpy(pan_section_ptr(ptr.cpu, COMPUTE_JOB, INVOCATION), &draw->invocation,
747 pan_size(INVOCATION));
748
749 pan_section_pack(ptr.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
750 cfg.job_task_split = 5;
751 }
752
753 panvk_emit_vertex_dcd(cmdbuf, draw,
754 pan_section_ptr(ptr.cpu, COMPUTE_JOB, DRAW));
755 return VK_SUCCESS;
756 }
757
758 static enum mali_draw_mode
translate_prim_topology(VkPrimitiveTopology in)759 translate_prim_topology(VkPrimitiveTopology in)
760 {
761 /* Test VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA separately, as it's not
762 * part of the VkPrimitiveTopology enum.
763 */
764 if (in == VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA)
765 return MALI_DRAW_MODE_TRIANGLES;
766
767 switch (in) {
768 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
769 return MALI_DRAW_MODE_POINTS;
770 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
771 return MALI_DRAW_MODE_LINES;
772 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
773 return MALI_DRAW_MODE_LINE_STRIP;
774 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
775 return MALI_DRAW_MODE_TRIANGLES;
776 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
777 return MALI_DRAW_MODE_TRIANGLE_STRIP;
778 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
779 return MALI_DRAW_MODE_TRIANGLE_FAN;
780 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
781 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
782 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
783 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
784 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
785 default:
786 unreachable("Invalid primitive type");
787 }
788 }
789
790 static void
panvk_emit_tiler_primitive(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_data * draw,struct mali_primitive_packed * prim)791 panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf,
792 const struct panvk_draw_data *draw,
793 struct mali_primitive_packed *prim)
794 {
795 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
796 const struct panvk_shader *fs = get_fs(cmdbuf);
797 const struct vk_dynamic_graphics_state *dyns =
798 &cmdbuf->vk.dynamic_graphics_state;
799 const struct vk_input_assembly_state *ia = &dyns->ia;
800 const struct vk_rasterization_state *rs = &dyns->rs;
801 bool writes_point_size =
802 vs->info.vs.writes_point_size &&
803 ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
804 bool secondary_shader = vs->info.vs.secondary_enable && fs != NULL;
805
806 pan_pack(prim, PRIMITIVE, cfg) {
807 cfg.draw_mode = translate_prim_topology(ia->primitive_topology);
808 if (writes_point_size)
809 cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
810
811 cfg.first_provoking_vertex =
812 cmdbuf->vk.dynamic_graphics_state.rs.provoking_vertex ==
813 VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
814
815 if (ia->primitive_restart_enable)
816 cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
817 cfg.job_task_split = 6;
818
819 if (draw->info.index.size) {
820 cfg.index_count = draw->info.vertex.count;
821 cfg.indices = draw->indices;
822 cfg.base_vertex_offset =
823 draw->info.vertex.base - draw->info.vertex.raw_offset;
824
825 switch (draw->info.index.size) {
826 case 4:
827 cfg.index_type = MALI_INDEX_TYPE_UINT32;
828 break;
829 case 2:
830 cfg.index_type = MALI_INDEX_TYPE_UINT16;
831 break;
832 case 1:
833 cfg.index_type = MALI_INDEX_TYPE_UINT8;
834 break;
835 default:
836 unreachable("Invalid index size");
837 }
838 } else {
839 cfg.index_count = draw->info.vertex.count;
840 cfg.index_type = MALI_INDEX_TYPE_NONE;
841 }
842
843 cfg.low_depth_cull = cfg.high_depth_cull =
844 vk_rasterization_state_depth_clip_enable(rs);
845
846 cfg.secondary_shader = secondary_shader;
847 }
848 }
849
850 static void
panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_data * draw,struct mali_primitive_size_packed * primsz)851 panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf,
852 const struct panvk_draw_data *draw,
853 struct mali_primitive_size_packed *primsz)
854 {
855 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
856 const struct vk_input_assembly_state *ia =
857 &cmdbuf->vk.dynamic_graphics_state.ia;
858 bool writes_point_size =
859 vs->info.vs.writes_point_size &&
860 ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
861
862 pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
863 if (writes_point_size) {
864 cfg.size_array = draw->psiz;
865 } else {
866 cfg.constant = draw->line_width;
867 }
868 }
869 }
870
871 static void
panvk_emit_tiler_dcd(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_data * draw,struct mali_draw_packed * dcd)872 panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf,
873 const struct panvk_draw_data *draw,
874 struct mali_draw_packed *dcd)
875 {
876 struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
877 const struct vk_rasterization_state *rs =
878 &cmdbuf->vk.dynamic_graphics_state.rs;
879 const struct vk_input_assembly_state *ia =
880 &cmdbuf->vk.dynamic_graphics_state.ia;
881
882 pan_pack(dcd, DRAW, cfg) {
883 cfg.front_face_ccw = rs->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE;
884 cfg.cull_front_face = (rs->cull_mode & VK_CULL_MODE_FRONT_BIT) != 0;
885 cfg.cull_back_face = (rs->cull_mode & VK_CULL_MODE_BACK_BIT) != 0;
886 cfg.position = draw->position;
887 cfg.state = draw->fs.rsd;
888 cfg.attributes = fs_desc_state->img_attrib_table;
889 cfg.attribute_buffers =
890 fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
891 cfg.viewport = draw->viewport;
892 cfg.varyings = draw->fs.varyings;
893 cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
894 cfg.thread_storage = draw->tls;
895
896 /* For all primitives but lines DRAW.flat_shading_vertex must
897 * be set to 0 and the provoking vertex is selected with the
898 * PRIMITIVE.first_provoking_vertex field.
899 */
900 if (ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
901 ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
902 cfg.flat_shading_vertex = true;
903
904 cfg.offset_start = draw->info.vertex.raw_offset;
905 cfg.instance_size =
906 draw->info.instance.count > 1 ? draw->padded_vertex_count : 1;
907 cfg.uniform_buffers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
908 cfg.push_uniforms = cmdbuf->state.gfx.fs.push_uniforms;
909 cfg.textures = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
910 cfg.samplers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
911
912 cfg.occlusion_query = cmdbuf->state.gfx.occlusion_query.mode;
913 cfg.occlusion = cmdbuf->state.gfx.occlusion_query.ptr;
914 }
915 }
916
917 static void
set_provoking_vertex_mode(struct panvk_cmd_buffer * cmdbuf)918 set_provoking_vertex_mode(struct panvk_cmd_buffer *cmdbuf)
919 {
920 struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
921 bool first_provoking_vertex =
922 cmdbuf->vk.dynamic_graphics_state.rs.provoking_vertex ==
923 VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
924
925 /* If this is not the first draw, first_provoking_vertex should match
926 * the one from the previous draws. Unfortunately, we can't check it
927 * when the render pass is inherited. */
928 assert(!cmdbuf->cur_batch->fb.desc.gpu ||
929 fbinfo->first_provoking_vertex == first_provoking_vertex);
930
931 fbinfo->first_provoking_vertex = first_provoking_vertex;
932 }
933
934 static VkResult
panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)935 panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
936 struct panvk_draw_data *draw)
937 {
938 struct panvk_batch *batch = cmdbuf->cur_batch;
939 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
940 struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
941 struct panfrost_ptr ptr;
942 VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
943 cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr);
944
945 if (result != VK_SUCCESS)
946 return result;
947
948 if (ptr.cpu)
949 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
950
951 draw->jobs.frag_copy_desc = ptr;
952
953 ptr = panvk_cmd_alloc_desc(cmdbuf, TILER_JOB);
954 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
955 draw->jobs.tiler = ptr;
956
957 memcpy(pan_section_ptr(ptr.cpu, TILER_JOB, INVOCATION), &draw->invocation,
958 pan_size(INVOCATION));
959
960 panvk_emit_tiler_primitive(cmdbuf, draw,
961 pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE));
962
963 panvk_emit_tiler_primitive_size(
964 cmdbuf, draw, pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE_SIZE));
965
966 panvk_emit_tiler_dcd(cmdbuf, draw,
967 pan_section_ptr(ptr.cpu, TILER_JOB, DRAW));
968
969 pan_section_pack(ptr.cpu, TILER_JOB, TILER, cfg) {
970 cfg.address = PAN_ARCH >= 9 ? draw->tiler_ctx->valhall.desc
971 : draw->tiler_ctx->bifrost.desc;
972 }
973
974 pan_section_pack(ptr.cpu, TILER_JOB, PADDING, padding)
975 ;
976
977 return VK_SUCCESS;
978 }
979
980 static VkResult
panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)981 panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer *cmdbuf,
982 struct panvk_draw_data *draw)
983 {
984 struct panvk_batch *batch = cmdbuf->cur_batch;
985 struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, INDEXED_VERTEX_JOB);
986 if (!ptr.gpu)
987 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
988
989 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
990 draw->jobs.idvs = ptr;
991
992 memcpy(pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, INVOCATION),
993 &draw->invocation, pan_size(INVOCATION));
994
995 panvk_emit_tiler_primitive(
996 cmdbuf, draw, pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, PRIMITIVE));
997
998 panvk_emit_tiler_primitive_size(
999 cmdbuf, draw,
1000 pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE));
1001
1002 pan_section_pack(ptr.cpu, INDEXED_VERTEX_JOB, TILER, cfg) {
1003 cfg.address = PAN_ARCH >= 9 ? draw->tiler_ctx->valhall.desc
1004 : draw->tiler_ctx->bifrost.desc;
1005 }
1006
1007 pan_section_pack(ptr.cpu, INDEXED_VERTEX_JOB, PADDING, _) {
1008 }
1009
1010 panvk_emit_tiler_dcd(
1011 cmdbuf, draw,
1012 pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, FRAGMENT_DRAW));
1013
1014 panvk_emit_vertex_dcd(
1015 cmdbuf, draw, pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
1016 return VK_SUCCESS;
1017 }
1018
1019 static VkResult
panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)1020 panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf,
1021 struct panvk_draw_data *draw)
1022 {
1023 struct panvk_batch *batch = cmdbuf->cur_batch;
1024 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
1025 const struct panvk_shader_desc_state *vs_desc_state =
1026 &cmdbuf->state.gfx.vs.desc;
1027 const struct vk_vertex_input_state *vi =
1028 cmdbuf->vk.dynamic_graphics_state.vi;
1029 unsigned num_vbs = util_last_bit(vi->bindings_valid);
1030 struct panfrost_ptr ptr;
1031 VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
1032 cmdbuf, vs, &cmdbuf->state.gfx.desc_state, vs_desc_state,
1033 num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2, &ptr);
1034 if (result != VK_SUCCESS)
1035 return result;
1036
1037 if (ptr.cpu)
1038 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
1039
1040 draw->jobs.vertex_copy_desc = ptr;
1041 return VK_SUCCESS;
1042 }
1043
1044 static VkResult
panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)1045 panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf,
1046 struct panvk_draw_data *draw)
1047 {
1048 const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
1049 struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
1050 struct panvk_batch *batch = cmdbuf->cur_batch;
1051 struct panfrost_ptr ptr;
1052 VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
1053 cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr);
1054
1055 if (result != VK_SUCCESS)
1056 return result;
1057
1058 if (ptr.cpu)
1059 util_dynarray_append(&batch->jobs, void *, ptr.cpu);
1060
1061 draw->jobs.frag_copy_desc = ptr;
1062 return VK_SUCCESS;
1063 }
1064
1065 void
panvk_per_arch(cmd_preload_fb_after_batch_split)1066 panvk_per_arch(cmd_preload_fb_after_batch_split)(struct panvk_cmd_buffer *cmdbuf)
1067 {
1068 for (unsigned i = 0; i < cmdbuf->state.gfx.render.fb.info.rt_count; i++) {
1069 if (cmdbuf->state.gfx.render.fb.info.rts[i].view) {
1070 cmdbuf->state.gfx.render.fb.info.rts[i].clear = false;
1071 cmdbuf->state.gfx.render.fb.info.rts[i].preload = true;
1072 }
1073 }
1074
1075 if (cmdbuf->state.gfx.render.fb.info.zs.view.zs) {
1076 cmdbuf->state.gfx.render.fb.info.zs.clear.z = false;
1077 cmdbuf->state.gfx.render.fb.info.zs.preload.z = true;
1078 }
1079
1080 if (cmdbuf->state.gfx.render.fb.info.zs.view.s ||
1081 (cmdbuf->state.gfx.render.fb.info.zs.view.zs &&
1082 util_format_is_depth_and_stencil(
1083 cmdbuf->state.gfx.render.fb.info.zs.view.zs->format))) {
1084 cmdbuf->state.gfx.render.fb.info.zs.clear.s = false;
1085 cmdbuf->state.gfx.render.fb.info.zs.preload.s = true;
1086 }
1087 }
1088
1089 static VkResult
panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer * cmd)1090 panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer *cmd)
1091 {
1092 struct panvk_cmd_graphics_state *gfx = &cmd->state.gfx;
1093
1094 if (!gfx_state_dirty(cmd, VS) && !gfx_state_dirty(cmd, FS))
1095 return VK_SUCCESS;
1096
1097 VkResult result = panvk_per_arch(link_shaders)(
1098 &cmd->desc_pool, gfx->vs.shader, get_fs(cmd), &gfx->link);
1099 if (result != VK_SUCCESS) {
1100 vk_command_buffer_set_error(&cmd->vk, result);
1101 return result;
1102 }
1103
1104 return VK_SUCCESS;
1105 }
1106
1107 static void
panvk_cmd_draw(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)1108 panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw)
1109 {
1110 struct panvk_batch *batch = cmdbuf->cur_batch;
1111 const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
1112 struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
1113 struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
1114 struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
1115 uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
1116 const struct vk_rasterization_state *rs =
1117 &cmdbuf->vk.dynamic_graphics_state.rs;
1118 bool idvs = vs->info.vs.idvs;
1119 VkResult result;
1120
1121 /* If there's no vertex shader, we can skip the draw. */
1122 if (!panvk_priv_mem_dev_addr(vs->rsd))
1123 return;
1124
1125 /* Needs to be done before get_fs() is called because it depends on
1126 * fs.required being initialized. */
1127 cmdbuf->state.gfx.fs.required =
1128 fs_required(&cmdbuf->state.gfx, &cmdbuf->vk.dynamic_graphics_state);
1129
1130 const struct panvk_shader *fs = get_fs(cmdbuf);
1131
1132 /* There are only 16 bits in the descriptor for the job ID. Each job has a
1133 * pilot shader dealing with descriptor copies, and we need one
1134 * <vertex,tiler> pair per draw.
1135 */
1136 if (batch->vtc_jc.job_index + (4 * layer_count) >= UINT16_MAX) {
1137 panvk_per_arch(cmd_close_batch)(cmdbuf);
1138 panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
1139 batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
1140 }
1141
1142 if (fs_user_dirty(cmdbuf)) {
1143 result = panvk_cmd_prepare_draw_link_shaders(cmdbuf);
1144 if (result != VK_SUCCESS)
1145 return;
1146 }
1147
1148 bool active_occlusion =
1149 cmdbuf->state.gfx.occlusion_query.mode != MALI_OCCLUSION_MODE_DISABLED;
1150 bool needs_tiling = !rs->rasterizer_discard_enable || active_occlusion;
1151
1152 set_provoking_vertex_mode(cmdbuf);
1153
1154 if (!rs->rasterizer_discard_enable) {
1155 struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1156 uint32_t rasterization_samples =
1157 cmdbuf->vk.dynamic_graphics_state.ms.rasterization_samples;
1158
1159 /* If there's no attachment, and the FB descriptor hasn't been allocated
1160 * yet, we patch nr_samples to match rasterization_samples, otherwise, we
1161 * make sure those two numbers match. */
1162 if (!batch->fb.desc.gpu && !cmdbuf->state.gfx.render.bound_attachments) {
1163 assert(rasterization_samples > 0);
1164 fbinfo->nr_samples = rasterization_samples;
1165 } else {
1166 assert(rasterization_samples == fbinfo->nr_samples);
1167 }
1168
1169 result = panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1170 if (result != VK_SUCCESS)
1171 return;
1172 }
1173
1174 result = panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1175 if (result != VK_SUCCESS)
1176 return;
1177
1178 panvk_draw_prepare_attributes(cmdbuf, draw);
1179
1180 uint32_t used_set_mask =
1181 vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
1182
1183 if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS) ||
1184 gfx_state_dirty(cmdbuf, FS)) {
1185 result = panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state,
1186 used_set_mask);
1187 if (result != VK_SUCCESS)
1188 return;
1189 }
1190
1191 if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
1192 result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
1193 cmdbuf, &cmdbuf->state.gfx.desc_state, vs, vs_desc_state);
1194 if (result != VK_SUCCESS)
1195 return;
1196
1197 panvk_draw_prepare_vs_copy_desc_job(cmdbuf, draw);
1198 }
1199
1200 unsigned copy_desc_job_id =
1201 draw->jobs.vertex_copy_desc.gpu
1202 ? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
1203 0, 0, &draw->jobs.vertex_copy_desc, false)
1204 : 0;
1205
1206 /* No need to setup the FS desc tables if the FS is not executed. */
1207 if (fs &&
1208 (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS))) {
1209 result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
1210 cmdbuf, &cmdbuf->state.gfx.desc_state, fs, fs_desc_state);
1211 if (result != VK_SUCCESS)
1212 return;
1213
1214 result = panvk_draw_prepare_fs_copy_desc_job(cmdbuf, draw);
1215 if (result != VK_SUCCESS)
1216 return;
1217
1218 if (draw->jobs.frag_copy_desc.gpu) {
1219 /* We don't need to add frag_copy_desc as a dependency because the
1220 * tiler job doesn't execute the fragment shader, the fragment job
1221 * will, and the tiler/fragment synchronization happens at the batch
1222 * level. */
1223 pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
1224 0, &draw->jobs.frag_copy_desc, false);
1225 }
1226 }
1227
1228 /* TODO: indexed draws */
1229 draw->tls = batch->tls.gpu;
1230 draw->fb = batch->fb.desc.gpu;
1231
1232 panfrost_pack_work_groups_compute(&draw->invocation, 1, draw->vertex_range,
1233 draw->info.instance.count, 1, 1, 1, true,
1234 false);
1235
1236 result = panvk_draw_prepare_fs_rsd(cmdbuf, draw);
1237 if (result != VK_SUCCESS)
1238 return;
1239
1240 batch->tlsinfo.tls.size = MAX3(vs->info.tls_size, fs ? fs->info.tls_size : 0,
1241 batch->tlsinfo.tls.size);
1242
1243 if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
1244 VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)(
1245 cmdbuf, desc_state, vs, vs_desc_state);
1246 if (result != VK_SUCCESS)
1247 return;
1248 }
1249
1250 if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) {
1251 VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)(
1252 cmdbuf, desc_state, fs, fs_desc_state);
1253 if (result != VK_SUCCESS)
1254 return;
1255 }
1256
1257 for (uint32_t i = 0; i < layer_count; i++) {
1258 draw->info.layer_id = i;
1259 result = panvk_draw_prepare_varyings(cmdbuf, draw);
1260 if (result != VK_SUCCESS)
1261 return;
1262
1263 panvk_per_arch(cmd_prepare_draw_sysvals)(cmdbuf, &draw->info);
1264
1265 /* Viewport emission requires up-to-date {scale,offset}.z for min/max Z,
1266 * so we need to call it after calling cmd_prepare_draw_sysvals(), but
1267 * viewports are the same for all layers, so we only emit when layer_id=0.
1268 */
1269 if (i == 0) {
1270 result = panvk_draw_prepare_viewport(cmdbuf, draw);
1271 if (result != VK_SUCCESS)
1272 return;
1273 }
1274
1275 result = panvk_per_arch(cmd_prepare_push_uniforms)(
1276 cmdbuf, cmdbuf->state.gfx.vs.shader);
1277 if (result != VK_SUCCESS)
1278 return;
1279
1280 if (fs) {
1281 result = panvk_per_arch(cmd_prepare_push_uniforms)(
1282 cmdbuf, cmdbuf->state.gfx.fs.shader);
1283 if (result != VK_SUCCESS)
1284 return;
1285 }
1286
1287 result = panvk_draw_prepare_tiler_context(cmdbuf, draw);
1288 if (result != VK_SUCCESS)
1289 return;
1290
1291 if (idvs) {
1292 result = panvk_draw_prepare_idvs_job(cmdbuf, draw);
1293 if (result != VK_SUCCESS)
1294 return;
1295
1296 pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_INDEXED_VERTEX, false,
1297 false, 0, copy_desc_job_id, &draw->jobs.idvs, false);
1298 } else {
1299 result = panvk_draw_prepare_vertex_job(cmdbuf, draw);
1300 if (result != VK_SUCCESS)
1301 return;
1302
1303 unsigned vjob_id =
1304 pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_VERTEX, false, false,
1305 0, copy_desc_job_id, &draw->jobs.vertex, false);
1306
1307 if (needs_tiling) {
1308 panvk_draw_prepare_tiler_job(cmdbuf, draw);
1309 pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_TILER, false, false,
1310 vjob_id, 0, &draw->jobs.tiler, false);
1311 }
1312 }
1313 }
1314
1315 clear_dirty_after_draw(cmdbuf);
1316 }
1317
1318 static unsigned
padded_vertex_count(struct panvk_cmd_buffer * cmdbuf,uint32_t vertex_count,uint32_t instance_count)1319 padded_vertex_count(struct panvk_cmd_buffer *cmdbuf, uint32_t vertex_count,
1320 uint32_t instance_count)
1321 {
1322 if (instance_count == 1)
1323 return vertex_count;
1324
1325 bool idvs = cmdbuf->state.gfx.vs.shader->info.vs.idvs;
1326
1327 /* Index-Driven Vertex Shading requires different instances to
1328 * have different cache lines for position results. Each vertex
1329 * position is 16 bytes and the Mali cache line is 64 bytes, so
1330 * the instance count must be aligned to 4 vertices.
1331 */
1332 if (idvs)
1333 vertex_count = ALIGN_POT(vertex_count, 4);
1334
1335 return panfrost_padded_vertex_count(vertex_count);
1336 }
1337
1338 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDraw)1339 panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount,
1340 uint32_t instanceCount, uint32_t firstVertex,
1341 uint32_t firstInstance)
1342 {
1343 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1344
1345 if (instanceCount == 0 || vertexCount == 0)
1346 return;
1347
1348 /* gl_BaseVertexARB is a signed integer, and it should expose the value of
1349 * firstVertex in a non-indexed draw. */
1350 assert(firstVertex < INT32_MAX);
1351
1352 /* gl_BaseInstance is a signed integer, and it should expose the value of
1353 * firstInstnace. */
1354 assert(firstInstance < INT32_MAX);
1355
1356 struct panvk_draw_data draw = {
1357 .info = {
1358 .vertex.base = firstVertex,
1359 .vertex.raw_offset = firstVertex,
1360 .vertex.count = vertexCount,
1361 .instance.base = firstInstance,
1362 .instance.count = instanceCount,
1363 },
1364 .vertex_range = vertexCount,
1365 .padded_vertex_count =
1366 padded_vertex_count(cmdbuf, vertexCount, instanceCount),
1367 };
1368
1369 panvk_cmd_draw(cmdbuf, &draw);
1370 }
1371
1372 static void
panvk_index_minmax_search(struct panvk_cmd_buffer * cmdbuf,uint32_t start,uint32_t count,bool restart,uint32_t * min,uint32_t * max)1373 panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf, uint32_t start,
1374 uint32_t count, bool restart, uint32_t *min,
1375 uint32_t *max)
1376 {
1377 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
1378 struct panvk_instance *instance =
1379 to_panvk_instance(dev->vk.physical->instance);
1380 void *ptr =
1381 cmdbuf->state.gfx.ib.buffer->host_ptr + cmdbuf->state.gfx.ib.offset;
1382
1383 assert(PAN_ARCH < 9 && cmdbuf->state.gfx.ib.buffer->host_ptr);
1384
1385 assert(cmdbuf->state.gfx.ib.buffer);
1386 assert(cmdbuf->state.gfx.ib.buffer->bo);
1387 assert(cmdbuf->state.gfx.ib.buffer->host_ptr);
1388
1389 if (!(instance->debug_flags & PANVK_DEBUG_NO_KNOWN_WARN)) {
1390 mesa_logw("Crawling index buffers from the CPU isn't valid in Vulkan\n");
1391 }
1392
1393 *max = 0;
1394
1395 /* TODO: Use panfrost_minmax_cache */
1396 /* TODO: Read full cacheline of data to mitigate the uncached
1397 * mapping slowness.
1398 */
1399 switch (cmdbuf->state.gfx.ib.index_size * 8) {
1400 #define MINMAX_SEARCH_CASE(sz) \
1401 case sz: { \
1402 uint##sz##_t *indices = ptr; \
1403 *min = UINT##sz##_MAX; \
1404 for (uint32_t i = 0; i < count; i++) { \
1405 if (restart && indices[i + start] == UINT##sz##_MAX) \
1406 continue; \
1407 *min = MIN2(indices[i + start], *min); \
1408 *max = MAX2(indices[i + start], *max); \
1409 } \
1410 break; \
1411 }
1412 MINMAX_SEARCH_CASE(32)
1413 MINMAX_SEARCH_CASE(16)
1414 MINMAX_SEARCH_CASE(8)
1415 #undef MINMAX_SEARCH_CASE
1416 default:
1417 unreachable("Invalid index size");
1418 }
1419 }
1420
1421 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndexed)1422 panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
1423 uint32_t indexCount, uint32_t instanceCount,
1424 uint32_t firstIndex, int32_t vertexOffset,
1425 uint32_t firstInstance)
1426 {
1427 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1428 uint32_t min_vertex, max_vertex;
1429
1430 if (instanceCount == 0 || indexCount == 0)
1431 return;
1432
1433 /* gl_BaseInstance is a signed integer, and it should expose the value of
1434 * firstInstnace. */
1435 assert(firstInstance < INT32_MAX);
1436
1437 const struct vk_input_assembly_state *ia =
1438 &cmdbuf->vk.dynamic_graphics_state.ia;
1439 bool primitive_restart = ia->primitive_restart_enable;
1440
1441 panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart,
1442 &min_vertex, &max_vertex);
1443
1444 unsigned vertex_range = max_vertex - min_vertex + 1;
1445 struct panvk_draw_data draw = {
1446 .info = {
1447 .index.size = cmdbuf->state.gfx.ib.index_size,
1448 .index.offset = firstIndex,
1449 .vertex.base = vertexOffset,
1450 .vertex.raw_offset = min_vertex + vertexOffset,
1451 .vertex.count = indexCount,
1452 .instance.base = firstInstance,
1453 .instance.count = instanceCount,
1454 },
1455 .vertex_range = vertex_range,
1456 .padded_vertex_count =
1457 padded_vertex_count(cmdbuf, vertex_range, instanceCount),
1458 .indices = panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer,
1459 cmdbuf->state.gfx.ib.offset) +
1460 (firstIndex * cmdbuf->state.gfx.ib.index_size),
1461 };
1462
1463 panvk_cmd_draw(cmdbuf, &draw);
1464 }
1465
1466 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndirect)1467 panvk_per_arch(CmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer _buffer,
1468 VkDeviceSize offset, uint32_t drawCount,
1469 uint32_t stride)
1470 {
1471 panvk_stub();
1472 }
1473
1474 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndexedIndirect)1475 panvk_per_arch(CmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer,
1476 VkBuffer _buffer, VkDeviceSize offset,
1477 uint32_t drawCount, uint32_t stride)
1478 {
1479 panvk_stub();
1480 }
1481
1482 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBeginRendering)1483 panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer,
1484 const VkRenderingInfo *pRenderingInfo)
1485 {
1486 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1487 struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
1488 bool resuming = cmdbuf->state.gfx.render.flags & VK_RENDERING_RESUMING_BIT;
1489
1490 /* When resuming from a suspended pass, the state should be unchanged. */
1491 if (resuming)
1492 state->render.flags = pRenderingInfo->flags;
1493 else
1494 panvk_per_arch(cmd_init_render_state)(cmdbuf, pRenderingInfo);
1495
1496 /* If we're not resuming, cur_batch should be NULL.
1497 * However, this currently isn't true because of how events are implemented.
1498 * XXX: Rewrite events to not close and open batch and add an assert here.
1499 */
1500 if (cmdbuf->cur_batch && !resuming)
1501 panvk_per_arch(cmd_close_batch)(cmdbuf);
1502
1503 /* The opened batch might have been disrupted by a compute job.
1504 * We need to preload in that case. */
1505 if (resuming && !cmdbuf->cur_batch)
1506 panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
1507
1508 if (!cmdbuf->cur_batch)
1509 panvk_per_arch(cmd_open_batch)(cmdbuf);
1510
1511 if (!resuming)
1512 panvk_per_arch(cmd_preload_render_area_border)(cmdbuf, pRenderingInfo);
1513 }
1514
1515 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdEndRendering)1516 panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer)
1517 {
1518 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1519
1520 if (!(cmdbuf->state.gfx.render.flags & VK_RENDERING_SUSPENDING_BIT)) {
1521 struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1522 bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
1523 for (unsigned i = 0; i < fbinfo->rt_count; i++)
1524 clear |= fbinfo->rts[i].clear;
1525
1526 if (clear)
1527 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1528
1529 panvk_per_arch(cmd_close_batch)(cmdbuf);
1530 cmdbuf->cur_batch = NULL;
1531 panvk_per_arch(cmd_resolve_attachments)(cmdbuf);
1532 }
1533 }
1534