• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Collabora Ltd.
3  *
4  * Derived from tu_cmd_buffer.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include "genxml/gen_macros.h"
13 
14 #include "panvk_buffer.h"
15 #include "panvk_cmd_alloc.h"
16 #include "panvk_cmd_buffer.h"
17 #include "panvk_cmd_desc_state.h"
18 #include "panvk_cmd_meta.h"
19 #include "panvk_device.h"
20 #include "panvk_entrypoints.h"
21 #include "panvk_image.h"
22 #include "panvk_image_view.h"
23 #include "panvk_instance.h"
24 #include "panvk_priv_bo.h"
25 #include "panvk_shader.h"
26 
27 #include "pan_desc.h"
28 #include "pan_earlyzs.h"
29 #include "pan_encoder.h"
30 #include "pan_format.h"
31 #include "pan_jc.h"
32 #include "pan_props.h"
33 #include "pan_shader.h"
34 
35 #include "vk_format.h"
36 #include "vk_meta.h"
37 #include "vk_pipeline_layout.h"
38 
39 struct panvk_draw_data {
40    struct panvk_draw_info info;
41    unsigned vertex_range;
42    unsigned padded_vertex_count;
43    struct mali_invocation_packed invocation;
44    struct {
45       uint64_t varyings;
46       uint64_t attributes;
47       uint64_t attribute_bufs;
48    } vs;
49    struct {
50       uint64_t rsd;
51       uint64_t varyings;
52    } fs;
53    uint64_t varying_bufs;
54    uint64_t position;
55    uint64_t indices;
56    union {
57       uint64_t psiz;
58       float line_width;
59    };
60    uint64_t tls;
61    uint64_t fb;
62    const struct pan_tiler_context *tiler_ctx;
63    uint64_t viewport;
64    struct {
65       struct panfrost_ptr vertex_copy_desc;
66       struct panfrost_ptr frag_copy_desc;
67       union {
68          struct {
69             struct panfrost_ptr vertex;
70             struct panfrost_ptr tiler;
71          };
72          struct panfrost_ptr idvs;
73       };
74    } jobs;
75 };
76 
77 static bool
has_depth_att(struct panvk_cmd_buffer * cmdbuf)78 has_depth_att(struct panvk_cmd_buffer *cmdbuf)
79 {
80    return (cmdbuf->state.gfx.render.bound_attachments &
81            MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0;
82 }
83 
84 static bool
has_stencil_att(struct panvk_cmd_buffer * cmdbuf)85 has_stencil_att(struct panvk_cmd_buffer *cmdbuf)
86 {
87    return (cmdbuf->state.gfx.render.bound_attachments &
88            MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0;
89 }
90 
91 static bool
writes_depth(struct panvk_cmd_buffer * cmdbuf)92 writes_depth(struct panvk_cmd_buffer *cmdbuf)
93 {
94    const struct vk_depth_stencil_state *ds =
95       &cmdbuf->vk.dynamic_graphics_state.ds;
96 
97    return has_depth_att(cmdbuf) && ds->depth.test_enable &&
98           ds->depth.write_enable && ds->depth.compare_op != VK_COMPARE_OP_NEVER;
99 }
100 
101 static bool
writes_stencil(struct panvk_cmd_buffer * cmdbuf)102 writes_stencil(struct panvk_cmd_buffer *cmdbuf)
103 {
104    const struct vk_depth_stencil_state *ds =
105       &cmdbuf->vk.dynamic_graphics_state.ds;
106 
107    return has_stencil_att(cmdbuf) && ds->stencil.test_enable &&
108           ((ds->stencil.front.write_mask &&
109             (ds->stencil.front.op.fail != VK_STENCIL_OP_KEEP ||
110              ds->stencil.front.op.pass != VK_STENCIL_OP_KEEP ||
111              ds->stencil.front.op.depth_fail != VK_STENCIL_OP_KEEP)) ||
112            (ds->stencil.back.write_mask &&
113             (ds->stencil.back.op.fail != VK_STENCIL_OP_KEEP ||
114              ds->stencil.back.op.pass != VK_STENCIL_OP_KEEP ||
115              ds->stencil.back.op.depth_fail != VK_STENCIL_OP_KEEP)));
116 }
117 
118 static bool
ds_test_always_passes(struct panvk_cmd_buffer * cmdbuf)119 ds_test_always_passes(struct panvk_cmd_buffer *cmdbuf)
120 {
121    const struct vk_depth_stencil_state *ds =
122       &cmdbuf->vk.dynamic_graphics_state.ds;
123 
124    if (!has_depth_att(cmdbuf))
125       return true;
126 
127    if (ds->depth.test_enable && ds->depth.compare_op != VK_COMPARE_OP_ALWAYS)
128       return false;
129 
130    if (ds->stencil.test_enable &&
131        (ds->stencil.front.op.compare != VK_COMPARE_OP_ALWAYS ||
132         ds->stencil.back.op.compare != VK_COMPARE_OP_ALWAYS))
133       return false;
134 
135    return true;
136 }
137 
138 static inline enum mali_func
translate_compare_func(VkCompareOp comp)139 translate_compare_func(VkCompareOp comp)
140 {
141    STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER);
142    STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS);
143    STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL);
144    STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL);
145    STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER);
146    STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL);
147    STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL ==
148                  (VkCompareOp)MALI_FUNC_GEQUAL);
149    STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS);
150 
151    return (enum mali_func)comp;
152 }
153 
154 static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)155 translate_stencil_op(VkStencilOp in)
156 {
157    switch (in) {
158    case VK_STENCIL_OP_KEEP:
159       return MALI_STENCIL_OP_KEEP;
160    case VK_STENCIL_OP_ZERO:
161       return MALI_STENCIL_OP_ZERO;
162    case VK_STENCIL_OP_REPLACE:
163       return MALI_STENCIL_OP_REPLACE;
164    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
165       return MALI_STENCIL_OP_INCR_SAT;
166    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
167       return MALI_STENCIL_OP_DECR_SAT;
168    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
169       return MALI_STENCIL_OP_INCR_WRAP;
170    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
171       return MALI_STENCIL_OP_DECR_WRAP;
172    case VK_STENCIL_OP_INVERT:
173       return MALI_STENCIL_OP_INVERT;
174    default:
175       unreachable("Invalid stencil op");
176    }
177 }
178 
179 static VkResult
panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)180 panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
181                           struct panvk_draw_data *draw)
182 {
183    bool dirty = dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) ||
184                 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE) ||
185                 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
186                 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) ||
187                 dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) ||
188                 dyn_gfx_state_dirty(cmdbuf, CB_LOGIC_OP_ENABLE) ||
189                 dyn_gfx_state_dirty(cmdbuf, CB_LOGIC_OP) ||
190                 dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||
191                 dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||
192                 dyn_gfx_state_dirty(cmdbuf, CB_BLEND_ENABLES) ||
193                 dyn_gfx_state_dirty(cmdbuf, CB_BLEND_EQUATIONS) ||
194                 dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) ||
195                 dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS) ||
196                 dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) ||
197                 dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) ||
198                 dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
199                 dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
200                 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_TEST_ENABLE) ||
201                 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_OP) ||
202                 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_COMPARE_MASK) ||
203                 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
204                 dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_REFERENCE) ||
205                 dyn_gfx_state_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
206                 dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) ||
207                 dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
208                 dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) ||
209                 gfx_state_dirty(cmdbuf, FS) || gfx_state_dirty(cmdbuf, OQ) ||
210                 gfx_state_dirty(cmdbuf, RENDER_STATE);
211 
212    if (!dirty) {
213       draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
214       return VK_SUCCESS;
215    }
216 
217    const struct vk_dynamic_graphics_state *dyns =
218       &cmdbuf->vk.dynamic_graphics_state;
219    const struct vk_rasterization_state *rs = &dyns->rs;
220    const struct vk_color_blend_state *cb = &dyns->cb;
221    const struct vk_depth_stencil_state *ds = &dyns->ds;
222    const struct panvk_shader *fs = get_fs(cmdbuf);
223    const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
224    unsigned bd_count = MAX2(cb->attachment_count, 1);
225    bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable;
226    bool test_z = has_depth_att(cmdbuf) && ds->depth.test_enable;
227    bool writes_z = writes_depth(cmdbuf);
228    bool writes_s = writes_stencil(cmdbuf);
229 
230    struct panfrost_ptr ptr = panvk_cmd_alloc_desc_aggregate(
231       cmdbuf, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(bd_count, BLEND));
232    if (!ptr.gpu)
233       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
234 
235    struct mali_renderer_state_packed *rsd = ptr.cpu;
236    struct mali_blend_packed *bds = ptr.cpu + pan_size(RENDERER_STATE);
237    struct panvk_blend_info *binfo = &cmdbuf->state.gfx.cb.info;
238 
239    uint64_t fs_code = panvk_shader_get_dev_addr(fs);
240 
241    if (fs_info != NULL) {
242       panvk_per_arch(blend_emit_descs)(cmdbuf, bds);
243    } else {
244       for (unsigned i = 0; i < bd_count; i++) {
245          pan_pack(&bds[i], BLEND, cfg) {
246             cfg.enable = false;
247             cfg.internal.mode = MALI_BLEND_MODE_OFF;
248          }
249       }
250    }
251 
252    pan_pack(rsd, RENDERER_STATE, cfg) {
253       bool alpha_to_coverage = dyns->ms.alpha_to_coverage_enable;
254 
255       if (fs) {
256          pan_shader_prepare_rsd(fs_info, fs_code, &cfg);
257 
258          if (binfo->shader_loads_blend_const) {
259             /* Preload the blend constant if the blend shader depends on it. */
260             cfg.preload.uniform_count =
261                MAX2(cfg.preload.uniform_count,
262                     DIV_ROUND_UP(SYSVALS_PUSH_CONST_BASE +
263                                     sizeof(struct panvk_graphics_sysvals),
264                                  8));
265          }
266 
267          uint8_t rt_written = fs_info->outputs_written >> FRAG_RESULT_DATA0;
268          uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
269                            MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
270          cfg.properties.allow_forward_pixel_to_kill =
271             fs_info->fs.can_fpk && !(rt_mask & ~rt_written) &&
272             !alpha_to_coverage && !binfo->any_dest_read;
273 
274          bool writes_zs = writes_z || writes_s;
275          bool zs_always_passes = ds_test_always_passes(cmdbuf);
276          bool oq = cmdbuf->state.gfx.occlusion_query.mode !=
277                    MALI_OCCLUSION_MODE_DISABLED;
278 
279          struct pan_earlyzs_state earlyzs =
280             pan_earlyzs_get(pan_earlyzs_analyze(fs_info), writes_zs || oq,
281                             alpha_to_coverage, zs_always_passes);
282 
283          cfg.properties.pixel_kill_operation = earlyzs.kill;
284          cfg.properties.zs_update_operation = earlyzs.update;
285          cfg.multisample_misc.evaluate_per_sample = fs->info.fs.sample_shading;
286       } else {
287          cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
288          cfg.properties.allow_forward_pixel_to_kill = true;
289          cfg.properties.allow_forward_pixel_to_be_killed = true;
290          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
291       }
292 
293       bool msaa = dyns->ms.rasterization_samples > 1;
294       cfg.multisample_misc.multisample_enable = msaa;
295       cfg.multisample_misc.sample_mask =
296          msaa ? dyns->ms.sample_mask : UINT16_MAX;
297 
298       cfg.multisample_misc.depth_function =
299          test_z ? translate_compare_func(ds->depth.compare_op)
300                 : MALI_FUNC_ALWAYS;
301 
302       cfg.multisample_misc.depth_write_mask = writes_z;
303       cfg.multisample_misc.fixed_function_near_discard =
304       cfg.multisample_misc.fixed_function_far_discard =
305          vk_rasterization_state_depth_clip_enable(rs);
306       cfg.multisample_misc.fixed_function_depth_range_fixed =
307          !rs->depth_clamp_enable;
308       cfg.multisample_misc.shader_depth_range_fixed = true;
309 
310       cfg.stencil_mask_misc.stencil_enable = test_s;
311       cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
312       cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
313       cfg.stencil_mask_misc.front_facing_depth_bias = rs->depth_bias.enable;
314       cfg.stencil_mask_misc.back_facing_depth_bias = rs->depth_bias.enable;
315       cfg.stencil_mask_misc.single_sampled_lines =
316          dyns->ms.rasterization_samples <= 1;
317 
318       cfg.depth_units = rs->depth_bias.constant_factor;
319       cfg.depth_factor = rs->depth_bias.slope_factor;
320       cfg.depth_bias_clamp = rs->depth_bias.clamp;
321 
322       cfg.stencil_front.mask = ds->stencil.front.compare_mask;
323       cfg.stencil_back.mask = ds->stencil.back.compare_mask;
324 
325       cfg.stencil_mask_misc.stencil_mask_front = ds->stencil.front.write_mask;
326       cfg.stencil_mask_misc.stencil_mask_back = ds->stencil.back.write_mask;
327 
328       cfg.stencil_front.reference_value = ds->stencil.front.reference;
329       cfg.stencil_back.reference_value = ds->stencil.back.reference;
330 
331       if (test_s) {
332          cfg.stencil_front.compare_function =
333             translate_compare_func(ds->stencil.front.op.compare);
334          cfg.stencil_front.stencil_fail =
335             translate_stencil_op(ds->stencil.front.op.fail);
336          cfg.stencil_front.depth_fail =
337             translate_stencil_op(ds->stencil.front.op.depth_fail);
338          cfg.stencil_front.depth_pass =
339             translate_stencil_op(ds->stencil.front.op.pass);
340          cfg.stencil_back.compare_function =
341             translate_compare_func(ds->stencil.back.op.compare);
342          cfg.stencil_back.stencil_fail =
343             translate_stencil_op(ds->stencil.back.op.fail);
344          cfg.stencil_back.depth_fail =
345             translate_stencil_op(ds->stencil.back.op.depth_fail);
346          cfg.stencil_back.depth_pass =
347             translate_stencil_op(ds->stencil.back.op.pass);
348       }
349    }
350 
351    cmdbuf->state.gfx.fs.rsd = ptr.gpu;
352    draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
353    return VK_SUCCESS;
354 }
355 
356 static VkResult
panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)357 panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
358                                  struct panvk_draw_data *draw)
359 {
360    struct panvk_batch *batch = cmdbuf->cur_batch;
361    VkResult result =
362       panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, draw->info.layer_id);
363    if (result != VK_SUCCESS)
364       return result;
365 
366    draw->tiler_ctx = &batch->tiler.ctx;
367    return VK_SUCCESS;
368 }
369 
370 static mali_pixel_format
panvk_varying_hw_format(gl_shader_stage stage,gl_varying_slot loc,enum pipe_format pfmt)371 panvk_varying_hw_format(gl_shader_stage stage, gl_varying_slot loc,
372                         enum pipe_format pfmt)
373 {
374    switch (loc) {
375    case VARYING_SLOT_PNTC:
376    case VARYING_SLOT_PSIZ:
377 #if PAN_ARCH <= 6
378       return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
379 #else
380       return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
381 #endif
382    case VARYING_SLOT_POS:
383 #if PAN_ARCH <= 6
384       return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
385 #else
386       return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
387 #endif
388    default:
389       if (pfmt != PIPE_FORMAT_NONE)
390          return GENX(panfrost_format_from_pipe_format)(pfmt)->hw;
391 
392 #if PAN_ARCH >= 7
393       return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
394 #else
395       return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
396 #endif
397    }
398 }
399 
400 static VkResult
panvk_draw_prepare_varyings(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)401 panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
402                             struct panvk_draw_data *draw)
403 {
404    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
405    const struct panvk_shader_link *link = &cmdbuf->state.gfx.link;
406    struct panfrost_ptr bufs = panvk_cmd_alloc_desc_array(
407       cmdbuf, PANVK_VARY_BUF_MAX + 1, ATTRIBUTE_BUFFER);
408    if (!bufs.gpu)
409       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
410 
411    struct mali_attribute_buffer_packed *buf_descs = bufs.cpu;
412    const struct vk_input_assembly_state *ia =
413       &cmdbuf->vk.dynamic_graphics_state.ia;
414    bool writes_point_size =
415       vs->info.vs.writes_point_size &&
416       ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
417    unsigned vertex_count =
418       draw->padded_vertex_count * draw->info.instance.count;
419    uint64_t psiz_buf = 0;
420 
421    for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
422       unsigned buf_size = vertex_count * link->buf_strides[i];
423       uint64_t buf_addr =
424          buf_size ? panvk_cmd_alloc_dev_mem(cmdbuf, varying, buf_size, 64).gpu
425                   : 0;
426       if (buf_size && !buf_addr)
427          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
428 
429       pan_pack(&buf_descs[i], ATTRIBUTE_BUFFER, cfg) {
430          cfg.stride = link->buf_strides[i];
431          cfg.size = buf_size;
432          cfg.pointer = buf_addr;
433       }
434 
435       if (i == PANVK_VARY_BUF_POSITION)
436          draw->position = buf_addr;
437 
438       if (i == PANVK_VARY_BUF_PSIZ)
439          psiz_buf = buf_addr;
440    }
441 
442    /* We need an empty entry to stop prefetching on Bifrost */
443    memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * PANVK_VARY_BUF_MAX), 0,
444           pan_size(ATTRIBUTE_BUFFER));
445 
446    if (writes_point_size)
447       draw->psiz = psiz_buf;
448    else if (ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
449             ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
450       draw->line_width = cmdbuf->vk.dynamic_graphics_state.rs.line.width;
451    else
452       draw->line_width = 1.0f;
453 
454    draw->varying_bufs = bufs.gpu;
455    draw->vs.varyings = panvk_priv_mem_dev_addr(link->vs.attribs);
456    draw->fs.varyings = panvk_priv_mem_dev_addr(link->fs.attribs);
457    return VK_SUCCESS;
458 }
459 
460 static void
panvk_draw_emit_attrib_buf(const struct panvk_draw_data * draw,const struct vk_vertex_binding_state * buf_info,const struct panvk_attrib_buf * buf,struct mali_attribute_buffer_packed * desc)461 panvk_draw_emit_attrib_buf(const struct panvk_draw_data *draw,
462                            const struct vk_vertex_binding_state *buf_info,
463                            const struct panvk_attrib_buf *buf,
464                            struct mali_attribute_buffer_packed *desc)
465 {
466    uint64_t addr = buf->address & ~63ULL;
467    unsigned size = buf->size + (buf->address & 63);
468    unsigned divisor = draw->padded_vertex_count * buf_info->divisor;
469    bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
470    struct mali_attribute_buffer_packed *buf_ext = &desc[1];
471 
472    /* TODO: support instanced arrays */
473    if (draw->info.instance.count <= 1) {
474       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
475          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
476          cfg.stride = per_instance ? 0 : buf_info->stride;
477          cfg.pointer = addr;
478          cfg.size = size;
479       }
480    } else if (!per_instance) {
481       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
482          cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
483          cfg.divisor = draw->padded_vertex_count;
484          cfg.stride = buf_info->stride;
485          cfg.pointer = addr;
486          cfg.size = size;
487       }
488    } else if (!divisor) {
489       /* instance_divisor == 0 means all instances share the same value.
490        * Make it a 1D array with a zero stride.
491        */
492       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
493          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
494          cfg.stride = 0;
495          cfg.pointer = addr;
496          cfg.size = size;
497       }
498    } else if (util_is_power_of_two_or_zero(divisor)) {
499       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
500          cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
501          cfg.stride = buf_info->stride;
502          cfg.pointer = addr;
503          cfg.size = size;
504          cfg.divisor_r = __builtin_ctz(divisor);
505       }
506    } else {
507       unsigned divisor_r = 0, divisor_e = 0;
508       unsigned divisor_num =
509          panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e);
510       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
511          cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
512          cfg.stride = buf_info->stride;
513          cfg.pointer = addr;
514          cfg.size = size;
515          cfg.divisor_r = divisor_r;
516          cfg.divisor_e = divisor_e;
517       }
518 
519       pan_cast_and_pack(buf_ext, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
520          cfg.divisor_numerator = divisor_num;
521          cfg.divisor = buf_info->divisor;
522       }
523 
524       buf_ext = NULL;
525    }
526 
527    /* If the buffer extension wasn't used, memset(0) */
528    if (buf_ext)
529       memset(buf_ext, 0, pan_size(ATTRIBUTE_BUFFER));
530 }
531 
532 static void
panvk_draw_emit_attrib(const struct panvk_draw_data * draw,const struct vk_vertex_attribute_state * attrib_info,const struct vk_vertex_binding_state * buf_info,const struct panvk_attrib_buf * buf,struct mali_attribute_packed * desc)533 panvk_draw_emit_attrib(const struct panvk_draw_data *draw,
534                        const struct vk_vertex_attribute_state *attrib_info,
535                        const struct vk_vertex_binding_state *buf_info,
536                        const struct panvk_attrib_buf *buf,
537                        struct mali_attribute_packed *desc)
538 {
539    bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
540    enum pipe_format f = vk_format_to_pipe_format(attrib_info->format);
541    unsigned buf_idx = attrib_info->binding;
542 
543    pan_pack(desc, ATTRIBUTE, cfg) {
544       cfg.buffer_index = buf_idx * 2;
545       cfg.offset = attrib_info->offset + (buf->address & 63);
546       cfg.offset_enable = true;
547 
548       if (per_instance)
549          cfg.offset += draw->info.instance.base * buf_info->stride;
550 
551       cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw;
552    }
553 }
554 
555 static VkResult
panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)556 panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
557                               struct panvk_draw_data *draw)
558 {
559    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
560    const struct vk_vertex_input_state *vi =
561       cmdbuf->vk.dynamic_graphics_state.vi;
562    unsigned num_imgs = vs->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_IMG];
563    unsigned num_vs_attribs = util_last_bit(vi->attributes_valid);
564    unsigned num_vbs = util_last_bit(vi->bindings_valid);
565    unsigned attrib_count =
566       num_imgs ? MAX_VS_ATTRIBS + num_imgs : num_vs_attribs;
567    bool dirty =
568       dyn_gfx_state_dirty(cmdbuf, VI) ||
569       dyn_gfx_state_dirty(cmdbuf, VI_BINDINGS_VALID) ||
570       dyn_gfx_state_dirty(cmdbuf, VI_BINDING_STRIDES) ||
571       gfx_state_dirty(cmdbuf, VB) ||
572       gfx_state_dirty(cmdbuf, DESC_STATE);
573 
574    if (!dirty)
575       return VK_SUCCESS;
576 
577    unsigned attrib_buf_count = (num_vbs + num_imgs) * 2;
578    struct panfrost_ptr bufs = panvk_cmd_alloc_desc_array(
579       cmdbuf, attrib_buf_count + 1, ATTRIBUTE_BUFFER);
580    struct mali_attribute_buffer_packed *attrib_buf_descs = bufs.cpu;
581    struct panfrost_ptr attribs =
582       panvk_cmd_alloc_desc_array(cmdbuf, attrib_count, ATTRIBUTE);
583    struct mali_attribute_packed *attrib_descs = attribs.cpu;
584 
585    if (!bufs.gpu || (attrib_count && !attribs.gpu))
586       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
587 
588    for (unsigned i = 0; i < num_vbs; i++) {
589       if (vi->bindings_valid & BITFIELD_BIT(i)) {
590          panvk_draw_emit_attrib_buf(draw, &vi->bindings[i],
591                                     &cmdbuf->state.gfx.vb.bufs[i],
592                                     &attrib_buf_descs[i * 2]);
593       } else {
594          memset(&attrib_buf_descs[i * 2], 0, sizeof(*attrib_buf_descs) * 2);
595       }
596    }
597 
598    for (unsigned i = 0; i < num_vs_attribs; i++) {
599       if (vi->attributes_valid & BITFIELD_BIT(i)) {
600          unsigned buf_idx = vi->attributes[i].binding;
601          panvk_draw_emit_attrib(
602             draw, &vi->attributes[i], &vi->bindings[buf_idx],
603             &cmdbuf->state.gfx.vb.bufs[buf_idx], &attrib_descs[i]);
604       } else {
605          memset(&attrib_descs[i], 0, sizeof(attrib_descs[0]));
606       }
607    }
608 
609    /* A NULL entry is needed to stop prefecting on Bifrost */
610    memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
611           pan_size(ATTRIBUTE_BUFFER));
612 
613    cmdbuf->state.gfx.vs.attrib_bufs = bufs.gpu;
614    cmdbuf->state.gfx.vs.attribs = attribs.gpu;
615 
616    if (num_imgs) {
617       cmdbuf->state.gfx.vs.desc.img_attrib_table =
618          attribs.gpu + (MAX_VS_ATTRIBS * pan_size(ATTRIBUTE));
619       cmdbuf->state.gfx.vs.desc.tables[PANVK_BIFROST_DESC_TABLE_IMG] =
620          bufs.gpu + (num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2);
621    }
622 
623    return VK_SUCCESS;
624 }
625 
626 static void
panvk_draw_prepare_attributes(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)627 panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
628                               struct panvk_draw_data *draw)
629 {
630    panvk_draw_prepare_vs_attribs(cmdbuf, draw);
631    draw->vs.attributes = cmdbuf->state.gfx.vs.attribs;
632    draw->vs.attribute_bufs = cmdbuf->state.gfx.vs.attrib_bufs;
633 }
634 
635 static void
panvk_emit_viewport(struct panvk_cmd_buffer * cmdbuf,struct mali_viewport_packed * vpd)636 panvk_emit_viewport(struct panvk_cmd_buffer *cmdbuf,
637                     struct mali_viewport_packed *vpd)
638 {
639    const struct vk_viewport_state *vp = &cmdbuf->vk.dynamic_graphics_state.vp;
640 
641    if (vp->viewport_count < 1)
642       return;
643 
644    struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals;
645    const VkViewport *viewport = &vp->viewports[0];
646    const VkRect2D *scissor = &vp->scissors[0];
647    float minz = sysvals->viewport.offset.z;
648    float maxz = minz + sysvals->viewport.scale.z;
649 
650    /* The spec says "width must be greater than 0.0" */
651    assert(viewport->width >= 0);
652    int minx = (int)viewport->x;
653    int maxx = (int)(viewport->x + viewport->width);
654 
655    /* Viewport height can be negative */
656    int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
657    int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
658 
659    assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
660    minx = MAX2(scissor->offset.x, minx);
661    miny = MAX2(scissor->offset.y, miny);
662    maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
663    maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
664 
665    /* Make sure we don't end up with a max < min when width/height is 0 */
666    maxx = maxx > minx ? maxx - 1 : maxx;
667    maxy = maxy > miny ? maxy - 1 : maxy;
668 
669    /* Clamp viewport scissor to valid range */
670    minx = CLAMP(minx, 0, UINT16_MAX);
671    maxx = CLAMP(maxx, 0, UINT16_MAX);
672    miny = CLAMP(miny, 0, UINT16_MAX);
673    maxy = CLAMP(maxy, 0, UINT16_MAX);
674 
675    pan_pack(vpd, VIEWPORT, cfg) {
676       cfg.scissor_minimum_x = minx;
677       cfg.scissor_minimum_y = miny;
678       cfg.scissor_maximum_x = maxx;
679       cfg.scissor_maximum_y = maxy;
680       cfg.minimum_z = MIN2(minz, maxz);
681       cfg.maximum_z = MAX2(minz, maxz);
682    }
683 }
684 
685 static VkResult
panvk_draw_prepare_viewport(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)686 panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
687                             struct panvk_draw_data *draw)
688 {
689    /* When rasterizerDiscardEnable is active, it is allowed to have viewport and
690     * scissor disabled.
691     * As a result, we define an empty one.
692     */
693    if (!cmdbuf->state.gfx.vpd || dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
694        dyn_gfx_state_dirty(cmdbuf, VP_SCISSORS) ||
695        dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
696        dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) {
697       struct panfrost_ptr vp = panvk_cmd_alloc_desc(cmdbuf, VIEWPORT);
698       if (!vp.gpu)
699          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
700 
701       panvk_emit_viewport(cmdbuf, vp.cpu);
702       cmdbuf->state.gfx.vpd = vp.gpu;
703    }
704 
705    draw->viewport = cmdbuf->state.gfx.vpd;
706    return VK_SUCCESS;
707 }
708 
709 static void
panvk_emit_vertex_dcd(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_data * draw,struct mali_draw_packed * dcd)710 panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf,
711                       const struct panvk_draw_data *draw,
712                       struct mali_draw_packed *dcd)
713 {
714    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
715    const struct panvk_shader_desc_state *vs_desc_state =
716       &cmdbuf->state.gfx.vs.desc;
717 
718    pan_pack(dcd, DRAW, cfg) {
719       cfg.state = panvk_priv_mem_dev_addr(vs->rsd);
720       cfg.attributes = draw->vs.attributes;
721       cfg.attribute_buffers = draw->vs.attribute_bufs;
722       cfg.varyings = draw->vs.varyings;
723       cfg.varying_buffers = draw->varying_bufs;
724       cfg.thread_storage = draw->tls;
725       cfg.offset_start = draw->info.vertex.raw_offset;
726       cfg.instance_size =
727          draw->info.instance.count > 1 ? draw->padded_vertex_count : 1;
728       cfg.uniform_buffers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
729       cfg.push_uniforms = cmdbuf->state.gfx.vs.push_uniforms;
730       cfg.textures = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
731       cfg.samplers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
732    }
733 }
734 
735 static VkResult
panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)736 panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
737                               struct panvk_draw_data *draw)
738 {
739    struct panvk_batch *batch = cmdbuf->cur_batch;
740    struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB);
741    if (!ptr.gpu)
742       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
743 
744    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
745    draw->jobs.vertex = ptr;
746 
747    memcpy(pan_section_ptr(ptr.cpu, COMPUTE_JOB, INVOCATION), &draw->invocation,
748           pan_size(INVOCATION));
749 
750    pan_section_pack(ptr.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
751       cfg.job_task_split = 5;
752    }
753 
754    panvk_emit_vertex_dcd(cmdbuf, draw,
755                          pan_section_ptr(ptr.cpu, COMPUTE_JOB, DRAW));
756    return VK_SUCCESS;
757 }
758 
759 static enum mali_draw_mode
translate_prim_topology(VkPrimitiveTopology in)760 translate_prim_topology(VkPrimitiveTopology in)
761 {
762    /* Test VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA separately, as it's not
763     * part of the VkPrimitiveTopology enum.
764     */
765    if (in == VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA)
766       return MALI_DRAW_MODE_TRIANGLES;
767 
768    switch (in) {
769    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
770       return MALI_DRAW_MODE_POINTS;
771    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
772       return MALI_DRAW_MODE_LINES;
773    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
774       return MALI_DRAW_MODE_LINE_STRIP;
775    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
776       return MALI_DRAW_MODE_TRIANGLES;
777    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
778       return MALI_DRAW_MODE_TRIANGLE_STRIP;
779    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
780       return MALI_DRAW_MODE_TRIANGLE_FAN;
781    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
782    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
783    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
784    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
785    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
786    default:
787       unreachable("Invalid primitive type");
788    }
789 }
790 
791 static void
panvk_emit_tiler_primitive(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_data * draw,struct mali_primitive_packed * prim)792 panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf,
793                            const struct panvk_draw_data *draw,
794                            struct mali_primitive_packed *prim)
795 {
796    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
797    const struct panvk_shader *fs = get_fs(cmdbuf);
798    const struct vk_dynamic_graphics_state *dyns =
799       &cmdbuf->vk.dynamic_graphics_state;
800    const struct vk_input_assembly_state *ia = &dyns->ia;
801    const struct vk_rasterization_state *rs = &dyns->rs;
802    bool writes_point_size =
803       vs->info.vs.writes_point_size &&
804       ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
805    bool secondary_shader = vs->info.vs.secondary_enable && fs != NULL;
806 
807    pan_pack(prim, PRIMITIVE, cfg) {
808       cfg.draw_mode = translate_prim_topology(ia->primitive_topology);
809       if (writes_point_size)
810          cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
811 
812       cfg.first_provoking_vertex =
813          cmdbuf->vk.dynamic_graphics_state.rs.provoking_vertex ==
814             VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
815 
816       if (ia->primitive_restart_enable)
817          cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
818       cfg.job_task_split = 6;
819 
820       if (draw->info.index.size) {
821          cfg.index_count = draw->info.vertex.count;
822          cfg.indices = draw->indices;
823          cfg.base_vertex_offset =
824             draw->info.vertex.base - draw->info.vertex.raw_offset;
825 
826          switch (draw->info.index.size) {
827          case 4:
828             cfg.index_type = MALI_INDEX_TYPE_UINT32;
829             break;
830          case 2:
831             cfg.index_type = MALI_INDEX_TYPE_UINT16;
832             break;
833          case 1:
834             cfg.index_type = MALI_INDEX_TYPE_UINT8;
835             break;
836          default:
837             unreachable("Invalid index size");
838          }
839       } else {
840          cfg.index_count = draw->info.vertex.count;
841          cfg.index_type = MALI_INDEX_TYPE_NONE;
842       }
843 
844       cfg.low_depth_cull = cfg.high_depth_cull =
845          vk_rasterization_state_depth_clip_enable(rs);
846 
847       cfg.secondary_shader = secondary_shader;
848    }
849 }
850 
851 static void
panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_data * draw,struct mali_primitive_size_packed * primsz)852 panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf,
853                                 const struct panvk_draw_data *draw,
854                                 struct mali_primitive_size_packed *primsz)
855 {
856    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
857    const struct vk_input_assembly_state *ia =
858       &cmdbuf->vk.dynamic_graphics_state.ia;
859    bool writes_point_size =
860       vs->info.vs.writes_point_size &&
861       ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
862 
863    pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
864       if (writes_point_size) {
865          cfg.size_array = draw->psiz;
866       } else {
867          cfg.constant = draw->line_width;
868       }
869    }
870 }
871 
872 static void
panvk_emit_tiler_dcd(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_data * draw,struct mali_draw_packed * dcd)873 panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf,
874                      const struct panvk_draw_data *draw,
875                      struct mali_draw_packed *dcd)
876 {
877    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
878    const struct vk_rasterization_state *rs =
879       &cmdbuf->vk.dynamic_graphics_state.rs;
880    const struct vk_input_assembly_state *ia =
881       &cmdbuf->vk.dynamic_graphics_state.ia;
882 
883    pan_pack(dcd, DRAW, cfg) {
884       cfg.front_face_ccw = rs->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE;
885       cfg.cull_front_face = (rs->cull_mode & VK_CULL_MODE_FRONT_BIT) != 0;
886       cfg.cull_back_face = (rs->cull_mode & VK_CULL_MODE_BACK_BIT) != 0;
887       cfg.position = draw->position;
888       cfg.state = draw->fs.rsd;
889       cfg.attributes = fs_desc_state->img_attrib_table;
890       cfg.attribute_buffers =
891          fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
892       cfg.viewport = draw->viewport;
893       cfg.varyings = draw->fs.varyings;
894       cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
895       cfg.thread_storage = draw->tls;
896 
897       /* For all primitives but lines DRAW.flat_shading_vertex must
898        * be set to 0 and the provoking vertex is selected with the
899        * PRIMITIVE.first_provoking_vertex field.
900        */
901       if (ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
902           ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
903          cfg.flat_shading_vertex = true;
904 
905       cfg.offset_start = draw->info.vertex.raw_offset;
906       cfg.instance_size =
907          draw->info.instance.count > 1 ? draw->padded_vertex_count : 1;
908       cfg.uniform_buffers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
909       cfg.push_uniforms = cmdbuf->state.gfx.fs.push_uniforms;
910       cfg.textures = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
911       cfg.samplers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
912 
913       cfg.occlusion_query = cmdbuf->state.gfx.occlusion_query.mode;
914       cfg.occlusion = cmdbuf->state.gfx.occlusion_query.ptr;
915    }
916 }
917 
918 static void
set_provoking_vertex_mode(struct panvk_cmd_buffer * cmdbuf)919 set_provoking_vertex_mode(struct panvk_cmd_buffer *cmdbuf)
920 {
921    struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
922    bool first_provoking_vertex =
923       cmdbuf->vk.dynamic_graphics_state.rs.provoking_vertex ==
924          VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
925 
926    /* If this is not the first draw, first_provoking_vertex should match
927     * the one from the previous draws. Unfortunately, we can't check it
928     * when the render pass is inherited. */
929    assert(!cmdbuf->cur_batch->fb.desc.gpu ||
930           fbinfo->first_provoking_vertex == first_provoking_vertex);
931 
932    fbinfo->first_provoking_vertex = first_provoking_vertex;
933 }
934 
935 static VkResult
panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)936 panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
937                              struct panvk_draw_data *draw)
938 {
939    struct panvk_batch *batch = cmdbuf->cur_batch;
940    const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
941    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
942    struct panfrost_ptr ptr;
943    VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
944       cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr);
945 
946    if (result != VK_SUCCESS)
947       return result;
948 
949    if (ptr.cpu)
950       util_dynarray_append(&batch->jobs, void *, ptr.cpu);
951 
952    draw->jobs.frag_copy_desc = ptr;
953 
954    ptr = panvk_cmd_alloc_desc(cmdbuf, TILER_JOB);
955    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
956    draw->jobs.tiler = ptr;
957 
958    memcpy(pan_section_ptr(ptr.cpu, TILER_JOB, INVOCATION), &draw->invocation,
959           pan_size(INVOCATION));
960 
961    panvk_emit_tiler_primitive(cmdbuf, draw,
962                               pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE));
963 
964    panvk_emit_tiler_primitive_size(
965       cmdbuf, draw, pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE_SIZE));
966 
967    panvk_emit_tiler_dcd(cmdbuf, draw,
968                         pan_section_ptr(ptr.cpu, TILER_JOB, DRAW));
969 
970    pan_section_pack(ptr.cpu, TILER_JOB, TILER, cfg) {
971       cfg.address = PAN_ARCH >= 9 ? draw->tiler_ctx->valhall.desc
972                                   : draw->tiler_ctx->bifrost.desc;
973    }
974 
975    pan_section_pack(ptr.cpu, TILER_JOB, PADDING, padding)
976       ;
977 
978    return VK_SUCCESS;
979 }
980 
981 static VkResult
panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)982 panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer *cmdbuf,
983                             struct panvk_draw_data *draw)
984 {
985    struct panvk_batch *batch = cmdbuf->cur_batch;
986    struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, INDEXED_VERTEX_JOB);
987    if (!ptr.gpu)
988       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
989 
990    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
991    draw->jobs.idvs = ptr;
992 
993    memcpy(pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, INVOCATION),
994           &draw->invocation, pan_size(INVOCATION));
995 
996    panvk_emit_tiler_primitive(
997       cmdbuf, draw, pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, PRIMITIVE));
998 
999    panvk_emit_tiler_primitive_size(
1000       cmdbuf, draw,
1001       pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE));
1002 
1003    pan_section_pack(ptr.cpu, INDEXED_VERTEX_JOB, TILER, cfg) {
1004       cfg.address = PAN_ARCH >= 9 ? draw->tiler_ctx->valhall.desc
1005                                   : draw->tiler_ctx->bifrost.desc;
1006    }
1007 
1008    pan_section_pack(ptr.cpu, INDEXED_VERTEX_JOB, PADDING, _) {
1009    }
1010 
1011    panvk_emit_tiler_dcd(
1012       cmdbuf, draw,
1013       pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, FRAGMENT_DRAW));
1014 
1015    panvk_emit_vertex_dcd(
1016       cmdbuf, draw, pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
1017    return VK_SUCCESS;
1018 }
1019 
1020 static VkResult
panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)1021 panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf,
1022                                     struct panvk_draw_data *draw)
1023 {
1024    struct panvk_batch *batch = cmdbuf->cur_batch;
1025    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
1026    const struct panvk_shader_desc_state *vs_desc_state =
1027       &cmdbuf->state.gfx.vs.desc;
1028    const struct vk_vertex_input_state *vi =
1029       cmdbuf->vk.dynamic_graphics_state.vi;
1030    unsigned num_vbs = util_last_bit(vi->bindings_valid);
1031    struct panfrost_ptr ptr;
1032    VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
1033       cmdbuf, vs, &cmdbuf->state.gfx.desc_state, vs_desc_state,
1034       num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2, &ptr);
1035    if (result != VK_SUCCESS)
1036       return result;
1037 
1038    if (ptr.cpu)
1039       util_dynarray_append(&batch->jobs, void *, ptr.cpu);
1040 
1041    draw->jobs.vertex_copy_desc = ptr;
1042    return VK_SUCCESS;
1043 }
1044 
1045 static VkResult
panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)1046 panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf,
1047                                     struct panvk_draw_data *draw)
1048 {
1049    const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
1050    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
1051    struct panvk_batch *batch = cmdbuf->cur_batch;
1052    struct panfrost_ptr ptr;
1053    VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
1054       cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr);
1055 
1056    if (result != VK_SUCCESS)
1057       return result;
1058 
1059    if (ptr.cpu)
1060       util_dynarray_append(&batch->jobs, void *, ptr.cpu);
1061 
1062    draw->jobs.frag_copy_desc = ptr;
1063    return VK_SUCCESS;
1064 }
1065 
1066 void
panvk_per_arch(cmd_preload_fb_after_batch_split)1067 panvk_per_arch(cmd_preload_fb_after_batch_split)(struct panvk_cmd_buffer *cmdbuf)
1068 {
1069    for (unsigned i = 0; i < cmdbuf->state.gfx.render.fb.info.rt_count; i++) {
1070       if (cmdbuf->state.gfx.render.fb.info.rts[i].view) {
1071          cmdbuf->state.gfx.render.fb.info.rts[i].clear = false;
1072          cmdbuf->state.gfx.render.fb.info.rts[i].preload = true;
1073       }
1074    }
1075 
1076    if (cmdbuf->state.gfx.render.fb.info.zs.view.zs) {
1077       cmdbuf->state.gfx.render.fb.info.zs.clear.z = false;
1078       cmdbuf->state.gfx.render.fb.info.zs.preload.z = true;
1079    }
1080 
1081    if (cmdbuf->state.gfx.render.fb.info.zs.view.s ||
1082        (cmdbuf->state.gfx.render.fb.info.zs.view.zs &&
1083         util_format_is_depth_and_stencil(
1084            cmdbuf->state.gfx.render.fb.info.zs.view.zs->format))) {
1085       cmdbuf->state.gfx.render.fb.info.zs.clear.s = false;
1086       cmdbuf->state.gfx.render.fb.info.zs.preload.s = true;
1087    }
1088 }
1089 
1090 static VkResult
panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer * cmd)1091 panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer *cmd)
1092 {
1093    struct panvk_cmd_graphics_state *gfx = &cmd->state.gfx;
1094 
1095    if (!gfx_state_dirty(cmd, VS) && !gfx_state_dirty(cmd, FS))
1096       return VK_SUCCESS;
1097 
1098    VkResult result = panvk_per_arch(link_shaders)(
1099       &cmd->desc_pool, gfx->vs.shader, get_fs(cmd), &gfx->link);
1100    if (result != VK_SUCCESS) {
1101       vk_command_buffer_set_error(&cmd->vk, result);
1102       return result;
1103    }
1104 
1105    return VK_SUCCESS;
1106 }
1107 
1108 static void
panvk_cmd_draw(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_data * draw)1109 panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw)
1110 {
1111    struct panvk_batch *batch = cmdbuf->cur_batch;
1112    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
1113    struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
1114    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
1115    struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
1116    uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
1117    const struct vk_rasterization_state *rs =
1118       &cmdbuf->vk.dynamic_graphics_state.rs;
1119    bool idvs = vs->info.vs.idvs;
1120    VkResult result;
1121 
1122    /* If there's no vertex shader, we can skip the draw. */
1123    if (!panvk_priv_mem_dev_addr(vs->rsd))
1124       return;
1125 
1126    /* Needs to be done before get_fs() is called because it depends on
1127     * fs.required being initialized. */
1128    cmdbuf->state.gfx.fs.required =
1129       fs_required(&cmdbuf->state.gfx, &cmdbuf->vk.dynamic_graphics_state);
1130 
1131    const struct panvk_shader *fs = get_fs(cmdbuf);
1132 
1133    /* There are only 16 bits in the descriptor for the job ID. Each job has a
1134     * pilot shader dealing with descriptor copies, and we need one
1135     * <vertex,tiler> pair per draw.
1136     */
1137    if (batch->vtc_jc.job_index + (4 * layer_count) >= UINT16_MAX) {
1138       panvk_per_arch(cmd_close_batch)(cmdbuf);
1139       panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
1140       batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
1141    }
1142 
1143    if (fs_user_dirty(cmdbuf)) {
1144       result = panvk_cmd_prepare_draw_link_shaders(cmdbuf);
1145       if (result != VK_SUCCESS)
1146          return;
1147    }
1148 
1149    bool active_occlusion =
1150       cmdbuf->state.gfx.occlusion_query.mode != MALI_OCCLUSION_MODE_DISABLED;
1151    bool needs_tiling = !rs->rasterizer_discard_enable || active_occlusion;
1152 
1153    set_provoking_vertex_mode(cmdbuf);
1154 
1155    if (!rs->rasterizer_discard_enable) {
1156       struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1157       uint32_t rasterization_samples =
1158          cmdbuf->vk.dynamic_graphics_state.ms.rasterization_samples;
1159 
1160       /* If there's no attachment, and the FB descriptor hasn't been allocated
1161        * yet, we patch nr_samples to match rasterization_samples, otherwise, we
1162        * make sure those two numbers match. */
1163       if (!batch->fb.desc.gpu && !cmdbuf->state.gfx.render.bound_attachments) {
1164          assert(rasterization_samples > 0);
1165          fbinfo->nr_samples = rasterization_samples;
1166       } else {
1167          assert(rasterization_samples == fbinfo->nr_samples);
1168       }
1169 
1170       result = panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1171       if (result != VK_SUCCESS)
1172          return;
1173    }
1174 
1175    result = panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1176    if (result != VK_SUCCESS)
1177       return;
1178 
1179    panvk_draw_prepare_attributes(cmdbuf, draw);
1180 
1181    uint32_t used_set_mask =
1182       vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
1183 
1184    if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS) ||
1185        gfx_state_dirty(cmdbuf, FS)) {
1186       result = panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state,
1187                                                       used_set_mask);
1188       if (result != VK_SUCCESS)
1189          return;
1190    }
1191 
1192    if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
1193       result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
1194          cmdbuf, &cmdbuf->state.gfx.desc_state, vs, vs_desc_state);
1195       if (result != VK_SUCCESS)
1196          return;
1197 
1198       panvk_draw_prepare_vs_copy_desc_job(cmdbuf, draw);
1199    }
1200 
1201    unsigned copy_desc_job_id =
1202       draw->jobs.vertex_copy_desc.gpu
1203          ? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
1204                           0, 0, &draw->jobs.vertex_copy_desc, false)
1205          : 0;
1206 
1207    /* No need to setup the FS desc tables if the FS is not executed. */
1208    if (fs &&
1209        (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS))) {
1210       result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
1211          cmdbuf, &cmdbuf->state.gfx.desc_state, fs, fs_desc_state);
1212       if (result != VK_SUCCESS)
1213          return;
1214 
1215       result = panvk_draw_prepare_fs_copy_desc_job(cmdbuf, draw);
1216       if (result != VK_SUCCESS)
1217          return;
1218 
1219       if (draw->jobs.frag_copy_desc.gpu) {
1220          /* We don't need to add frag_copy_desc as a dependency because the
1221           * tiler job doesn't execute the fragment shader, the fragment job
1222           * will, and the tiler/fragment synchronization happens at the batch
1223           * level. */
1224          pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
1225                         0, &draw->jobs.frag_copy_desc, false);
1226       }
1227    }
1228 
1229    /* TODO: indexed draws */
1230    draw->tls = batch->tls.gpu;
1231    draw->fb = batch->fb.desc.gpu;
1232 
1233    panfrost_pack_work_groups_compute(&draw->invocation, 1, draw->vertex_range,
1234                                      draw->info.instance.count, 1, 1, 1, true,
1235                                      false);
1236 
1237    result = panvk_draw_prepare_fs_rsd(cmdbuf, draw);
1238    if (result != VK_SUCCESS)
1239       return;
1240 
1241    batch->tlsinfo.tls.size = MAX3(vs->info.tls_size, fs ? fs->info.tls_size : 0,
1242                                   batch->tlsinfo.tls.size);
1243 
1244    if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
1245       VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)(
1246          cmdbuf, desc_state, vs, vs_desc_state);
1247       if (result != VK_SUCCESS)
1248          return;
1249    }
1250 
1251    if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) {
1252       VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)(
1253          cmdbuf, desc_state, fs, fs_desc_state);
1254       if (result != VK_SUCCESS)
1255          return;
1256    }
1257 
1258    for (uint32_t i = 0; i < layer_count; i++) {
1259       draw->info.layer_id = i;
1260       result = panvk_draw_prepare_varyings(cmdbuf, draw);
1261       if (result != VK_SUCCESS)
1262          return;
1263 
1264       panvk_per_arch(cmd_prepare_draw_sysvals)(cmdbuf, &draw->info);
1265 
1266       /* Viewport emission requires up-to-date {scale,offset}.z for min/max Z,
1267        * so we need to call it after calling cmd_prepare_draw_sysvals(), but
1268        * viewports are the same for all layers, so we only emit when layer_id=0.
1269        */
1270       if (i == 0) {
1271          result = panvk_draw_prepare_viewport(cmdbuf, draw);
1272          if (result != VK_SUCCESS)
1273             return;
1274       }
1275 
1276       result = panvk_per_arch(cmd_prepare_push_uniforms)(
1277          cmdbuf, cmdbuf->state.gfx.vs.shader);
1278       if (result != VK_SUCCESS)
1279          return;
1280 
1281       if (fs) {
1282          result = panvk_per_arch(cmd_prepare_push_uniforms)(
1283             cmdbuf, cmdbuf->state.gfx.fs.shader);
1284          if (result != VK_SUCCESS)
1285             return;
1286       }
1287 
1288       result = panvk_draw_prepare_tiler_context(cmdbuf, draw);
1289       if (result != VK_SUCCESS)
1290          return;
1291 
1292       if (idvs) {
1293          result = panvk_draw_prepare_idvs_job(cmdbuf, draw);
1294          if (result != VK_SUCCESS)
1295             return;
1296 
1297          pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_INDEXED_VERTEX, false,
1298                         false, 0, copy_desc_job_id, &draw->jobs.idvs, false);
1299       } else {
1300          result = panvk_draw_prepare_vertex_job(cmdbuf, draw);
1301          if (result != VK_SUCCESS)
1302             return;
1303 
1304          unsigned vjob_id =
1305             pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_VERTEX, false, false,
1306                            0, copy_desc_job_id, &draw->jobs.vertex, false);
1307 
1308          if (needs_tiling) {
1309             panvk_draw_prepare_tiler_job(cmdbuf, draw);
1310             pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_TILER, false, false,
1311                            vjob_id, 0, &draw->jobs.tiler, false);
1312          }
1313       }
1314    }
1315 
1316    clear_dirty_after_draw(cmdbuf);
1317 }
1318 
1319 static unsigned
padded_vertex_count(struct panvk_cmd_buffer * cmdbuf,uint32_t vertex_count,uint32_t instance_count)1320 padded_vertex_count(struct panvk_cmd_buffer *cmdbuf, uint32_t vertex_count,
1321                     uint32_t instance_count)
1322 {
1323    if (instance_count == 1)
1324       return vertex_count;
1325 
1326    bool idvs = cmdbuf->state.gfx.vs.shader->info.vs.idvs;
1327 
1328    /* Index-Driven Vertex Shading requires different instances to
1329     * have different cache lines for position results. Each vertex
1330     * position is 16 bytes and the Mali cache line is 64 bytes, so
1331     * the instance count must be aligned to 4 vertices.
1332     */
1333    if (idvs)
1334       vertex_count = ALIGN_POT(vertex_count, 4);
1335 
1336    return panfrost_padded_vertex_count(vertex_count);
1337 }
1338 
1339 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDraw)1340 panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount,
1341                         uint32_t instanceCount, uint32_t firstVertex,
1342                         uint32_t firstInstance)
1343 {
1344    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1345 
1346    if (instanceCount == 0 || vertexCount == 0)
1347       return;
1348 
1349    /* gl_BaseVertexARB is a signed integer, and it should expose the value of
1350     * firstVertex in a non-indexed draw. */
1351    assert(firstVertex < INT32_MAX);
1352 
1353    /* gl_BaseInstance is a signed integer, and it should expose the value of
1354     * firstInstnace. */
1355    assert(firstInstance < INT32_MAX);
1356 
1357    struct panvk_draw_data draw = {
1358       .info = {
1359          .vertex.base = firstVertex,
1360          .vertex.raw_offset = firstVertex,
1361          .vertex.count = vertexCount,
1362          .instance.base = firstInstance,
1363          .instance.count = instanceCount,
1364       },
1365       .vertex_range = vertexCount,
1366       .padded_vertex_count =
1367          padded_vertex_count(cmdbuf, vertexCount, instanceCount),
1368    };
1369 
1370    panvk_cmd_draw(cmdbuf, &draw);
1371 }
1372 
1373 static void
panvk_index_minmax_search(struct panvk_cmd_buffer * cmdbuf,uint32_t start,uint32_t count,bool restart,uint32_t * min,uint32_t * max)1374 panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf, uint32_t start,
1375                           uint32_t count, bool restart, uint32_t *min,
1376                           uint32_t *max)
1377 {
1378    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
1379    struct panvk_instance *instance =
1380       to_panvk_instance(dev->vk.physical->instance);
1381    void *ptr =
1382       cmdbuf->state.gfx.ib.buffer->host_ptr + cmdbuf->state.gfx.ib.offset;
1383 
1384    assert(PAN_ARCH < 9 && cmdbuf->state.gfx.ib.buffer->host_ptr);
1385 
1386    assert(cmdbuf->state.gfx.ib.buffer);
1387    assert(cmdbuf->state.gfx.ib.buffer->bo);
1388    assert(cmdbuf->state.gfx.ib.buffer->host_ptr);
1389 
1390    if (!(instance->debug_flags & PANVK_DEBUG_NO_KNOWN_WARN)) {
1391       mesa_logw("Crawling index buffers from the CPU isn't valid in Vulkan\n");
1392    }
1393 
1394    *max = 0;
1395 
1396    /* TODO: Use panfrost_minmax_cache */
1397    /* TODO: Read full cacheline of data to mitigate the uncached
1398     * mapping slowness.
1399     */
1400    switch (cmdbuf->state.gfx.ib.index_size * 8) {
1401 #define MINMAX_SEARCH_CASE(sz)                                                 \
1402    case sz: {                                                                  \
1403       uint##sz##_t *indices = ptr;                                             \
1404       *min = UINT##sz##_MAX;                                                   \
1405       for (uint32_t i = 0; i < count; i++) {                                   \
1406          if (restart && indices[i + start] == UINT##sz##_MAX)                  \
1407             continue;                                                          \
1408          *min = MIN2(indices[i + start], *min);                                \
1409          *max = MAX2(indices[i + start], *max);                                \
1410       }                                                                        \
1411       break;                                                                   \
1412    }
1413       MINMAX_SEARCH_CASE(32)
1414       MINMAX_SEARCH_CASE(16)
1415       MINMAX_SEARCH_CASE(8)
1416 #undef MINMAX_SEARCH_CASE
1417    default:
1418       unreachable("Invalid index size");
1419    }
1420 }
1421 
1422 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndexed)1423 panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
1424                                uint32_t indexCount, uint32_t instanceCount,
1425                                uint32_t firstIndex, int32_t vertexOffset,
1426                                uint32_t firstInstance)
1427 {
1428    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1429    uint32_t min_vertex, max_vertex;
1430 
1431    if (instanceCount == 0 || indexCount == 0)
1432       return;
1433 
1434    /* gl_BaseInstance is a signed integer, and it should expose the value of
1435     * firstInstnace. */
1436    assert(firstInstance < INT32_MAX);
1437 
1438    const struct vk_input_assembly_state *ia =
1439       &cmdbuf->vk.dynamic_graphics_state.ia;
1440    bool primitive_restart = ia->primitive_restart_enable;
1441 
1442    panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart,
1443                              &min_vertex, &max_vertex);
1444 
1445    unsigned vertex_range = max_vertex - min_vertex + 1;
1446    struct panvk_draw_data draw = {
1447       .info = {
1448          .index.size = cmdbuf->state.gfx.ib.index_size,
1449          .index.offset = firstIndex,
1450          .vertex.base = vertexOffset,
1451          .vertex.raw_offset = min_vertex + vertexOffset,
1452          .vertex.count = indexCount,
1453          .instance.base = firstInstance,
1454          .instance.count = instanceCount,
1455       },
1456       .vertex_range = vertex_range,
1457       .padded_vertex_count =
1458          padded_vertex_count(cmdbuf, vertex_range, instanceCount),
1459       .indices = panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer,
1460                                       cmdbuf->state.gfx.ib.offset) +
1461                  (firstIndex * cmdbuf->state.gfx.ib.index_size),
1462    };
1463 
1464    panvk_cmd_draw(cmdbuf, &draw);
1465 }
1466 
1467 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndirect)1468 panvk_per_arch(CmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer _buffer,
1469                                 VkDeviceSize offset, uint32_t drawCount,
1470                                 uint32_t stride)
1471 {
1472    panvk_stub();
1473 }
1474 
1475 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndexedIndirect)1476 panvk_per_arch(CmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer,
1477                                        VkBuffer _buffer, VkDeviceSize offset,
1478                                        uint32_t drawCount, uint32_t stride)
1479 {
1480    panvk_stub();
1481 }
1482 
1483 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBeginRendering)1484 panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer,
1485                                   const VkRenderingInfo *pRenderingInfo)
1486 {
1487    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1488    struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
1489    bool resuming = cmdbuf->state.gfx.render.flags & VK_RENDERING_RESUMING_BIT;
1490 
1491    /* When resuming from a suspended pass, the state should be unchanged. */
1492    if (resuming)
1493       state->render.flags = pRenderingInfo->flags;
1494    else
1495       panvk_per_arch(cmd_init_render_state)(cmdbuf, pRenderingInfo);
1496 
1497    /* If we're not resuming, cur_batch should be NULL.
1498     * However, this currently isn't true because of how events are implemented.
1499     * XXX: Rewrite events to not close and open batch and add an assert here.
1500     */
1501    if (cmdbuf->cur_batch && !resuming)
1502       panvk_per_arch(cmd_close_batch)(cmdbuf);
1503 
1504    /* The opened batch might have been disrupted by a compute job.
1505     * We need to preload in that case. */
1506    if (resuming && !cmdbuf->cur_batch)
1507       panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
1508 
1509    if (!cmdbuf->cur_batch)
1510       panvk_per_arch(cmd_open_batch)(cmdbuf);
1511 
1512    if (!resuming)
1513       panvk_per_arch(cmd_preload_render_area_border)(cmdbuf, pRenderingInfo);
1514 }
1515 
1516 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdEndRendering)1517 panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer)
1518 {
1519    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1520 
1521    if (!(cmdbuf->state.gfx.render.flags & VK_RENDERING_SUSPENDING_BIT)) {
1522       struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1523       bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
1524       for (unsigned i = 0; i < fbinfo->rt_count; i++)
1525          clear |= fbinfo->rts[i].clear;
1526 
1527       if (clear)
1528          panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1529 
1530       panvk_per_arch(cmd_close_batch)(cmdbuf);
1531       cmdbuf->cur_batch = NULL;
1532       panvk_per_arch(cmd_resolve_attachments)(cmdbuf);
1533    }
1534 }
1535