• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Valve Corporation
3  * Copyright 2024 Alyssa Rosenzweig
4  * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #pragma once
9 
10 #include "util/macros.h"
11 
12 #include "util/list.h"
13 #include "agx_helpers.h"
14 #include "agx_linker.h"
15 #include "agx_pack.h"
16 #include "agx_tilebuffer.h"
17 #include "agx_uvs.h"
18 #include "libagx_dgc.h"
19 #include "pool.h"
20 #include "shader_enums.h"
21 
22 #include "hk_private.h"
23 #include "hk_shader.h"
24 
25 #include "hk_cmd_pool.h"
26 #include "hk_descriptor_set.h"
27 
28 #include "asahi/lib/agx_nir_lower_vbo.h"
29 #include "util/u_dynarray.h"
30 #include "vulkan/vulkan_core.h"
31 
32 #include "vk_command_buffer.h"
33 
34 #include <stdio.h>
35 
36 struct hk_buffer;
37 struct hk_cmd_bo;
38 struct hk_cmd_pool;
39 struct hk_image_view;
40 struct hk_push_descriptor_set;
41 struct hk_shader;
42 struct hk_linked_shader;
43 struct agx_usc_builder;
44 struct vk_shader;
45 
46 /** Root descriptor table. */
47 struct hk_root_descriptor_table {
48    uint64_t root_desc_addr;
49 
50    union {
51       struct {
52          uint32_t view_index;
53          uint32_t ppp_multisamplectl;
54 
55          /* Vertex input state */
56          uint64_t attrib_base[AGX_MAX_VBUFS];
57          uint32_t attrib_clamps[AGX_MAX_VBUFS];
58 
59          /* Pointer to the VS->TCS, VS->GS, or TES->GS buffer. */
60          uint64_t vertex_output_buffer;
61 
62          /* Mask of outputs flowing VS->TCS, VS->GS, or TES->GS . */
63          uint64_t vertex_outputs;
64 
65          /* Address of input assembly buffer if geom/tess is used, else 0 */
66          uint64_t input_assembly;
67 
68          /* Address of tessellation param buffer if tessellation used, else 0 */
69          uint64_t tess_params;
70 
71          /* Address of geometry param buffer if GS is used, else 0 */
72          uint64_t geometry_params;
73 
74          /* Pipeline statistics queries. This is a base address with flags. */
75          uint64_t pipeline_stats;
76          VkQueryPipelineStatisticFlags pipeline_stats_flags;
77 
78          float blend_constant[4];
79          uint16_t no_epilog_discard;
80          uint16_t _pad1;
81          uint16_t api_sample_mask;
82          uint16_t _pad2;
83          uint16_t force_never_in_shader;
84          uint16_t _pad3;
85          uint16_t provoking;
86          uint16_t _pad4;
87 
88          /* True if there is an API geometry shader. If false, there may still
89           * be a geometry shader in use (notably for transform feedback) but it
90           * should not contribute to pipeline statistics.
91           */
92          uint16_t api_gs;
93          uint16_t _pad5;
94 
95          /* Mapping from varying slots written by the last vertex stage to UVS
96           * indices. This mapping must be compatible with the fragment shader.
97           */
98          uint8_t uvs_index[VARYING_SLOT_MAX];
99       } draw;
100       struct {
101          uint64_t group_count_addr;
102          uint32_t base_group[3];
103       } cs;
104    };
105 
106    /* Client push constants */
107    uint8_t push[HK_MAX_PUSH_SIZE];
108 
109    /* Descriptor set base addresses */
110    uint64_t sets[HK_MAX_SETS];
111 
112    /* Dynamic buffer bindings */
113    struct hk_buffer_address dynamic_buffers[HK_MAX_DYNAMIC_BUFFERS];
114 
115    /* Start index in dynamic_buffers where each set starts */
116    uint8_t set_dynamic_buffer_start[HK_MAX_SETS];
117 };
118 
119 /* helper macro for computing root descriptor byte offsets */
120 #define hk_root_descriptor_offset(member)                                      \
121    offsetof(struct hk_root_descriptor_table, member)
122 
123 struct hk_descriptor_state {
124    bool root_dirty;
125    struct hk_root_descriptor_table root;
126 
127    uint32_t set_sizes[HK_MAX_SETS];
128    struct hk_descriptor_set *sets[HK_MAX_SETS];
129    uint32_t sets_dirty;
130 
131    struct hk_push_descriptor_set *push[HK_MAX_SETS];
132    uint32_t push_dirty;
133 };
134 
135 struct hk_attachment {
136    VkFormat vk_format;
137    struct hk_image_view *iview;
138 
139    VkResolveModeFlagBits resolve_mode;
140    struct hk_image_view *resolve_iview;
141 };
142 
143 struct hk_bg_eot {
144    uint64_t usc;
145    struct agx_counts_packed counts;
146 };
147 
148 struct hk_render_registers {
149    uint32_t width, height, layers;
150    uint32_t zls_width, zls_height;
151    uint32_t isp_bgobjdepth;
152    uint32_t isp_bgobjvals;
153    struct agx_zls_control_packed zls_control, zls_control_partial;
154    uint32_t iogpu_unk_214;
155    uint32_t depth_dimensions;
156    bool process_empty_tiles;
157    enum u_tristate dbias_is_int;
158 
159    struct {
160       uint32_t dimensions;
161       uint64_t buffer, meta;
162       uint32_t stride, meta_stride;
163    } depth;
164 
165    struct {
166       uint64_t buffer, meta;
167       uint32_t stride, meta_stride;
168    } stencil;
169 
170    struct {
171       struct hk_bg_eot main;
172       struct hk_bg_eot partial;
173    } bg;
174 
175    struct {
176       struct hk_bg_eot main;
177       struct hk_bg_eot partial;
178    } eot;
179 };
180 
181 struct hk_rendering_state {
182    VkRenderingFlagBits flags;
183 
184    VkRect2D area;
185    uint32_t layer_count;
186    uint32_t view_mask;
187 
188    uint32_t color_att_count;
189    struct hk_attachment color_att[HK_MAX_RTS];
190    struct hk_attachment depth_att;
191    struct hk_attachment stencil_att;
192 
193    struct agx_tilebuffer_layout tilebuffer;
194    struct hk_render_registers cr;
195 };
196 
197 struct hk_index_buffer_state {
198    struct hk_addr_range buffer;
199    enum agx_index_size size;
200    uint32_t restart;
201 };
202 
203 /* Dirty tracking bits for state not tracked by vk_dynamic_graphics_state or
204  * shaders_dirty.
205  */
206 enum hk_dirty {
207    HK_DIRTY_VB = BITFIELD_BIT(0),
208    HK_DIRTY_OCCLUSION = BITFIELD_BIT(1),
209    HK_DIRTY_PROVOKING = BITFIELD_BIT(2),
210    HK_DIRTY_VARYINGS = BITFIELD_BIT(3),
211 };
212 
213 struct hk_graphics_state {
214    struct hk_rendering_state render;
215    struct hk_descriptor_state descriptors;
216 
217    enum hk_dirty dirty;
218 
219    uint64_t root;
220    uint64_t draw_params;
221    uint64_t draw_id_ptr;
222 
223    uint32_t shaders_dirty;
224    struct hk_api_shader *shaders[MESA_SHADER_MESH + 1];
225 
226    /* Vertex buffers */
227    struct hk_addr_range vb[AGX_MAX_VBUFS];
228 
229    /* Transform feedback buffers */
230    struct hk_addr_range xfb[4];
231 
232    /* Is transform feedback enabled? */
233    bool xfb_enabled;
234 
235    /* Internal transform feedback offset vec4.
236     *
237     * TODO: Strictly could be global.
238     */
239    uint64_t xfb_offsets;
240 
241    /* Pointer to the GPU memory backing active transform feedback queries,
242     * per-stream. Zero if no query is bound.
243     */
244    uint64_t xfb_query[4];
245 
246    struct hk_index_buffer_state index;
247    enum agx_primitive topology;
248    enum agx_object_type object_type;
249 
250    /* Provoking vertex 0, 1, or 2. Usually 0 or 2 for FIRST/LAST. 1 can only be
251     * set for tri fans.
252     */
253    uint8_t provoking;
254 
255    struct {
256       enum agx_visibility_mode mode;
257 
258       /* If enabled, index of the current occlusion query in the occlusion heap.
259        * There can only be one active at a time (hardware contraint).
260        */
261       uint16_t index;
262    } occlusion;
263 
264    /* Fast linked shader data structures */
265    uint64_t varyings;
266    struct agx_varyings_vs linked_varyings;
267 
268    uint32_t linked_dirty;
269    struct hk_linked_shader *linked[PIPE_SHADER_TYPES];
270    bool generate_primitive_id;
271 
272    /* Tessellation state */
273    struct {
274       uint64_t out_draws;
275       uint64_t grids;
276       struct hk_tess_info info;
277       enum mesa_prim prim;
278    } tess;
279 
280    /* Needed by vk_command_buffer::dynamic_graphics_state */
281    struct vk_vertex_input_state _dynamic_vi;
282    struct vk_sample_locations_state _dynamic_sl;
283 };
284 
285 struct hk_compute_state {
286    struct hk_descriptor_state descriptors;
287    struct hk_api_shader *shader;
288 };
289 
290 struct hk_cmd_push {
291    void *map;
292    uint64_t addr;
293    uint32_t range;
294    bool no_prefetch;
295 };
296 
297 struct hk_scratch_req {
298    bool main;
299    bool preamble;
300 };
301 
302 /*
303  * Represents a firmware timestamp request.  Handle is a kernel timestamp object
304  * handle, not a GEM handle.
305  *
306  * The kernel/firmware uses the handle/offset_B to write. We use the address to
307  * read the results back. We could deduplicate this, but this is convenient.
308  */
309 struct agx_timestamp_req {
310    uint64_t addr;
311    uint32_t handle;
312    uint32_t offset_B;
313 };
314 
315 /*
316  * hk_cs represents a single control stream, to be enqueued either to the
317  * CDM or VDM for compute/3D respectively.
318  */
319 enum hk_cs_type {
320    HK_CS_CDM,
321    HK_CS_VDM,
322 };
323 
324 struct hk_cs {
325    struct list_head node;
326 
327    /* Parent command buffer. Convenience. */
328    struct hk_cmd_buffer *cmd;
329 
330    /* Data master */
331    enum hk_cs_type type;
332 
333    /* Address of the root control stream for the job */
334    uint64_t addr;
335 
336    /* Fat pointer to the start of the current chunk of the control stream */
337    struct agx_ptr chunk;
338 
339    /* Start pointer of the root control stream */
340    void *start;
341 
342    /* Current pointer within the control stream */
343    void *current;
344 
345    /* End pointer of the current chunk of the control stream */
346    void *end;
347 
348    /* Whether there is more than just the root chunk */
349    bool stream_linked;
350 
351    /* Scratch requirements */
352    struct {
353       union {
354          struct hk_scratch_req vs;
355          struct hk_scratch_req cs;
356       };
357 
358       struct hk_scratch_req fs;
359    } scratch;
360 
361    /* Immediate writes, type libagx_imm_write. These all happen in parallel at
362     * the end of the control stream. This accelerates queries. Implies CDM.
363     */
364    struct util_dynarray imm_writes;
365 
366    /* Statistics */
367    struct {
368       uint32_t calls, cmds, flushes;
369    } stats;
370 
371    /* Timestamp writes. Currently just compute end / fragment end. We could
372     * flesh this out later if we want finer info. (We will, but it's not
373     * required for conformance.)
374     */
375    struct {
376       struct agx_timestamp_req end;
377    } timestamp;
378 
379    /* Remaining state is for graphics only, ignored for compute */
380    struct agx_tilebuffer_layout tib;
381 
382    struct util_dynarray scissor, depth_bias;
383    uint64_t uploaded_scissor, uploaded_zbias;
384 
385    /* We can only set ppp_multisamplectl once per batch. has_sample_locations
386     * tracks if we've committed to a set of sample locations yet. vk_meta
387     * operations do not set has_sample_locations since they don't care and it
388     * would interfere with the app-provided samples.
389     *
390     */
391    bool has_sample_locations;
392    uint32_t ppp_multisamplectl;
393 
394    struct hk_render_registers cr;
395 
396    /* Active restart index if one is set. Zero if there is no restart index set
397     * yet, since Vulkan does not allow zero restart indices (unlike OpenGL).
398     * This is used in place of dirty tracking, because dirty tracking
399     * restart indices is complicated and just checking the saved value is cheap.
400     */
401    uint32_t restart_index;
402 };
403 
404 static inline uint64_t
hk_cs_current_addr(struct hk_cs * cs)405 hk_cs_current_addr(struct hk_cs *cs)
406 {
407    return cs->chunk.gpu + ((uint8_t *)cs->current - (uint8_t *)cs->chunk.cpu);
408 }
409 
410 struct hk_uploader {
411    /** List of hk_cmd_bo */
412    struct list_head bos;
413 
414    /* Current addresses */
415    uint8_t *map;
416    uint64_t base;
417    uint32_t offset;
418 };
419 
420 struct hk_cmd_buffer {
421    struct vk_command_buffer vk;
422 
423    struct {
424       struct hk_graphics_state gfx;
425       struct hk_compute_state cs;
426    } state;
427 
428    struct {
429       struct hk_uploader main, usc;
430    } uploader;
431 
432    /* List of all recorded control streams */
433    struct list_head control_streams;
434 
435    /* Current recorded control stream */
436    struct {
437       /* VDM stream for 3D */
438       struct hk_cs *gfx;
439 
440       /* CDM stream for compute */
441       struct hk_cs *cs;
442 
443       /* CDM stream that executes immediately before the current graphics
444        * control stream. Used for geometry shading, tessellation, etc.
445        */
446       struct hk_cs *pre_gfx;
447 
448       /* CDM stream that will execute after the current graphics control stream
449        * finishes. Used for queries.
450        */
451       struct hk_cs *post_gfx;
452    } current_cs;
453 
454    /* Are we currently inside a vk_meta operation? This alters sample location
455     * behaviour.
456     */
457    bool in_meta;
458 
459    /* XXX: move me?
460     *
461     * Indirect draw generated by the pre-GS for the geometry shader.
462     */
463    uint64_t geom_indirect;
464 
465    /* Does the command buffer use the geometry heap? */
466    bool uses_heap;
467 
468    /* Owned large BOs */
469    struct util_dynarray large_bos;
470 };
471 
472 VK_DEFINE_HANDLE_CASTS(hk_cmd_buffer, vk.base, VkCommandBuffer,
473                        VK_OBJECT_TYPE_COMMAND_BUFFER)
474 
475 extern const struct vk_command_buffer_ops hk_cmd_buffer_ops;
476 
477 static inline struct hk_device *
hk_cmd_buffer_device(struct hk_cmd_buffer * cmd)478 hk_cmd_buffer_device(struct hk_cmd_buffer *cmd)
479 {
480    return (struct hk_device *)cmd->vk.base.device;
481 }
482 
483 static inline struct hk_cmd_pool *
hk_cmd_buffer_pool(struct hk_cmd_buffer * cmd)484 hk_cmd_buffer_pool(struct hk_cmd_buffer *cmd)
485 {
486    return (struct hk_cmd_pool *)cmd->vk.pool;
487 }
488 
489 /*
490  * The hardware vertex shader is supplied by the last geometry stage. The
491  * geometry pipeline is vertex->tess->geometry so we search backwards.
492  */
493 static inline struct hk_shader *
hk_bound_hw_vs(struct hk_graphics_state * gfx)494 hk_bound_hw_vs(struct hk_graphics_state *gfx)
495 {
496    struct hk_api_shader *vs = gfx->shaders[MESA_SHADER_VERTEX];
497    struct hk_api_shader *tes = gfx->shaders[MESA_SHADER_TESS_EVAL];
498    struct hk_api_shader *gs = gfx->shaders[MESA_SHADER_GEOMETRY];
499 
500    if (gs)
501       return &gs->variants[HK_GS_VARIANT_RAST];
502    else if (tes)
503       return &tes->variants[HK_VS_VARIANT_HW];
504    else
505       return &vs->variants[HK_VS_VARIANT_HW];
506 }
507 
508 static inline struct hk_shader *
hk_bound_sw_vs(struct hk_graphics_state * gfx)509 hk_bound_sw_vs(struct hk_graphics_state *gfx)
510 {
511    struct hk_api_shader *vs = gfx->shaders[MESA_SHADER_VERTEX];
512    struct hk_shader *hw_vs = hk_bound_hw_vs(gfx);
513 
514    if (hw_vs == &vs->variants[HK_VS_VARIANT_HW])
515       return hw_vs;
516    else
517       return &vs->variants[HK_VS_VARIANT_SW];
518 }
519 
520 static inline struct hk_shader *
hk_bound_sw_vs_before_gs(struct hk_graphics_state * gfx)521 hk_bound_sw_vs_before_gs(struct hk_graphics_state *gfx)
522 {
523    struct hk_api_shader *vs = gfx->shaders[MESA_SHADER_VERTEX];
524    struct hk_api_shader *tes = gfx->shaders[MESA_SHADER_TESS_EVAL];
525    struct hk_api_shader *api = tes ?: vs;
526 
527    return &api->variants[HK_VS_VARIANT_SW];
528 }
529 
530 struct agx_ptr hk_pool_alloc_internal(struct hk_cmd_buffer *cmd, uint32_t size,
531                                       uint32_t alignment, bool usc);
532 
533 uint64_t hk_pool_upload(struct hk_cmd_buffer *cmd, const void *data,
534                         uint32_t size, uint32_t alignment);
535 
536 static inline struct agx_ptr
hk_pool_alloc(struct hk_cmd_buffer * cmd,uint32_t size,uint32_t alignment)537 hk_pool_alloc(struct hk_cmd_buffer *cmd, uint32_t size, uint32_t alignment)
538 {
539    return hk_pool_alloc_internal(cmd, size, alignment, false);
540 }
541 
542 static inline struct agx_ptr
hk_pool_usc_alloc(struct hk_cmd_buffer * cmd,uint32_t size,uint32_t alignment)543 hk_pool_usc_alloc(struct hk_cmd_buffer *cmd, uint32_t size, uint32_t alignment)
544 {
545    return hk_pool_alloc_internal(cmd, size, alignment, true);
546 }
547 
548 void hk_cs_init_graphics(struct hk_cmd_buffer *cmd, struct hk_cs *cs);
549 uint32_t hk_default_sample_positions(unsigned nr_samples);
550 
551 static inline struct hk_cs *
hk_cmd_buffer_get_cs_general(struct hk_cmd_buffer * cmd,struct hk_cs ** ptr,bool compute)552 hk_cmd_buffer_get_cs_general(struct hk_cmd_buffer *cmd, struct hk_cs **ptr,
553                              bool compute)
554 {
555    if ((*ptr) == NULL) {
556       /* Allocate root control stream */
557       size_t initial_size = 65536;
558       struct agx_ptr root = hk_pool_alloc(cmd, initial_size, 1024);
559       if (!root.cpu)
560          return NULL;
561 
562       /* Allocate hk_cs for the new stream */
563       struct hk_cs *cs = malloc(sizeof(*cs));
564       *cs = (struct hk_cs){
565          .cmd = cmd,
566          .type = compute ? HK_CS_CDM : HK_CS_VDM,
567          .addr = root.gpu,
568          .start = root.cpu,
569          .chunk = root,
570          .current = root.cpu,
571          .end = root.cpu + initial_size,
572       };
573 
574       list_inithead(&cs->node);
575 
576       bool before_gfx = (ptr == &cmd->current_cs.pre_gfx);
577 
578       /* Insert into the command buffer. We usually append to the end of the
579        * command buffer, except for pre-graphics streams which go right before
580        * the graphics workload. (This implies a level of out-of-order processing
581        * that's allowed by Vulkan and required for efficient
582        * geometry/tessellation shaders.)
583        */
584       if (before_gfx && cmd->current_cs.gfx) {
585          list_addtail(&cs->node, &cmd->current_cs.gfx->node);
586       } else {
587          list_addtail(&cs->node, &cmd->control_streams);
588       }
589 
590       *ptr = cs;
591 
592       if (!compute)
593          hk_cs_init_graphics(cmd, cs);
594    }
595 
596    assert(*ptr != NULL);
597    return *ptr;
598 }
599 
600 static inline struct hk_cs *
hk_cmd_buffer_get_cs(struct hk_cmd_buffer * cmd,bool compute)601 hk_cmd_buffer_get_cs(struct hk_cmd_buffer *cmd, bool compute)
602 {
603    struct hk_cs **ptr = compute ? &cmd->current_cs.cs : &cmd->current_cs.gfx;
604    return hk_cmd_buffer_get_cs_general(cmd, ptr, compute);
605 }
606 
607 void hk_ensure_cs_has_space(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
608                             size_t space);
609 
610 static inline uint64_t
hk_cs_alloc_for_indirect(struct hk_cs * cs,size_t size_B)611 hk_cs_alloc_for_indirect(struct hk_cs *cs, size_t size_B)
612 {
613    hk_ensure_cs_has_space(cs->cmd, cs, size_B);
614 
615    uint64_t addr = hk_cs_current_addr(cs);
616    cs->current += size_B;
617    return addr;
618 }
619 
620 static void
hk_cmd_buffer_dirty_all(struct hk_cmd_buffer * cmd)621 hk_cmd_buffer_dirty_all(struct hk_cmd_buffer *cmd)
622 {
623    struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
624    struct hk_graphics_state *gfx = &cmd->state.gfx;
625 
626    vk_dynamic_graphics_state_dirty_all(dyn);
627    gfx->dirty = ~0;
628    gfx->shaders_dirty = ~0;
629    gfx->linked_dirty = ~0;
630    gfx->descriptors.root_dirty = true;
631 }
632 
633 static inline void
hk_cs_destroy(struct hk_cs * cs)634 hk_cs_destroy(struct hk_cs *cs)
635 {
636    if (cs->type == HK_CS_VDM) {
637       util_dynarray_fini(&cs->scissor);
638       util_dynarray_fini(&cs->depth_bias);
639    } else {
640       util_dynarray_fini(&cs->imm_writes);
641    }
642 
643    free(cs);
644 }
645 
646 void hk_dispatch_imm_writes(struct hk_cmd_buffer *cmd, struct hk_cs *cs);
647 
648 static void
hk_cmd_buffer_end_compute_internal(struct hk_cmd_buffer * cmd,struct hk_cs ** ptr)649 hk_cmd_buffer_end_compute_internal(struct hk_cmd_buffer *cmd,
650                                    struct hk_cs **ptr)
651 {
652    if (*ptr) {
653       struct hk_cs *cs = *ptr;
654 
655       /* This control stream may write immediates as it ends. Queue the writes
656        * now that we're done emitting everything else.
657        */
658       if (cs->imm_writes.size) {
659          hk_dispatch_imm_writes(cmd, cs);
660       }
661 
662       cs->current = agx_cdm_terminate(cs->current);
663    }
664 
665    *ptr = NULL;
666 }
667 
668 static void
hk_cmd_buffer_end_compute(struct hk_cmd_buffer * cmd)669 hk_cmd_buffer_end_compute(struct hk_cmd_buffer *cmd)
670 {
671    hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.cs);
672 }
673 
674 static void
hk_cmd_buffer_end_graphics(struct hk_cmd_buffer * cmd)675 hk_cmd_buffer_end_graphics(struct hk_cmd_buffer *cmd)
676 {
677    struct hk_cs *cs = cmd->current_cs.gfx;
678 
679    if (cs) {
680       /* Scissor and depth bias arrays are staged to dynamic arrays on the CPU.
681        * When we end the control stream, they're done growing and are ready for
682        * upload.
683        */
684       cs->uploaded_scissor =
685          hk_pool_upload(cmd, cs->scissor.data, cs->scissor.size, 64);
686 
687       cs->uploaded_zbias =
688          hk_pool_upload(cmd, cs->depth_bias.data, cs->depth_bias.size, 64);
689 
690       /* TODO: maybe free scissor/depth_bias now? */
691 
692       cmd->current_cs.gfx->current = agx_vdm_terminate(cs->current);
693       cmd->current_cs.gfx = NULL;
694    }
695 
696    hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.pre_gfx);
697    hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.post_gfx);
698 
699    assert(cmd->current_cs.gfx == NULL);
700 
701    /* We just flushed out the heap use. If we want to use it again, we'll need
702     * to queue a free for it again.
703     */
704    cmd->uses_heap = false;
705 }
706 
707 static inline uint64_t
hk_pipeline_stat_addr(struct hk_cmd_buffer * cmd,VkQueryPipelineStatisticFlagBits stat)708 hk_pipeline_stat_addr(struct hk_cmd_buffer *cmd,
709                       VkQueryPipelineStatisticFlagBits stat)
710 {
711    struct hk_root_descriptor_table *root = &cmd->state.gfx.descriptors.root;
712    VkQueryPipelineStatisticFlags flags = root->draw.pipeline_stats_flags;
713 
714    if (flags & stat) {
715       assert(!cmd->in_meta && "queries paused for meta");
716       assert(util_bitcount(stat) == 1 && "by construction");
717 
718       /* Prefix sum to determine the compacted index in the query pool */
719       uint32_t index = util_bitcount(flags & (stat - 1));
720 
721       return root->draw.pipeline_stats + (sizeof(uint64_t) * index);
722    } else {
723       /* Query disabled */
724       return 0;
725    }
726 }
727 
728 void hk_cmd_buffer_begin_graphics(struct hk_cmd_buffer *cmd,
729                                   const VkCommandBufferBeginInfo *pBeginInfo);
730 void hk_cmd_buffer_begin_compute(struct hk_cmd_buffer *cmd,
731                                  const VkCommandBufferBeginInfo *pBeginInfo);
732 
733 void hk_cmd_invalidate_graphics_state(struct hk_cmd_buffer *cmd);
734 void hk_cmd_invalidate_compute_state(struct hk_cmd_buffer *cmd);
735 
736 void hk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd, uint32_t stage_count,
737                          const gl_shader_stage *stages,
738                          struct vk_shader **const shaders);
739 
740 void hk_cmd_bind_graphics_shader(struct hk_cmd_buffer *cmd,
741                                  const gl_shader_stage stage,
742                                  struct hk_api_shader *shader);
743 
744 void hk_cmd_bind_compute_shader(struct hk_cmd_buffer *cmd,
745                                 struct hk_api_shader *shader);
746 
747 void hk_cmd_bind_vertex_buffer(struct hk_cmd_buffer *cmd, uint32_t vb_idx,
748                                struct hk_addr_range addr_range);
749 
750 static inline struct hk_descriptor_state *
hk_get_descriptors_state(struct hk_cmd_buffer * cmd,VkPipelineBindPoint bind_point)751 hk_get_descriptors_state(struct hk_cmd_buffer *cmd,
752                          VkPipelineBindPoint bind_point)
753 {
754    switch (bind_point) {
755    case VK_PIPELINE_BIND_POINT_GRAPHICS:
756       return &cmd->state.gfx.descriptors;
757    case VK_PIPELINE_BIND_POINT_COMPUTE:
758       return &cmd->state.cs.descriptors;
759    default:
760       unreachable("Unhandled bind point");
761    }
762 };
763 
764 void hk_cmd_buffer_flush_push_descriptors(struct hk_cmd_buffer *cmd,
765                                           struct hk_descriptor_state *desc);
766 
767 void hk_meta_resolve_rendering(struct hk_cmd_buffer *cmd,
768                                const VkRenderingInfo *pRenderingInfo);
769 
770 uint64_t hk_cmd_buffer_upload_root(struct hk_cmd_buffer *cmd,
771                                    VkPipelineBindPoint bind_point);
772 
773 void hk_reserve_scratch(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
774                         struct hk_shader *s);
775 
776 uint32_t hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s,
777                              struct hk_linked_shader *linked);
778 
779 void hk_usc_upload_spilled_rt_descs(struct agx_usc_builder *b,
780                                     struct hk_cmd_buffer *cmd);
781 
782 void hk_cdm_cache_flush(struct hk_device *dev, struct hk_cs *cs);
783 
784 void hk_dispatch_with_usc_launch(struct hk_device *dev, struct hk_cs *cs,
785                                  struct agx_cdm_launch_word_0_packed launch,
786                                  uint32_t usc, struct agx_grid grid,
787                                  struct agx_workgroup local_size);
788 
789 void hk_dispatch_with_usc(struct hk_device *dev, struct hk_cs *cs,
790                           struct agx_shader_info *info, uint32_t usc,
791                           struct agx_grid grid,
792                           struct agx_workgroup local_size);
793 
794 static inline void
hk_dispatch_with_local_size(struct hk_cmd_buffer * cmd,struct hk_cs * cs,struct hk_shader * s,struct agx_grid grid,struct agx_workgroup local_size)795 hk_dispatch_with_local_size(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
796                             struct hk_shader *s, struct agx_grid grid,
797                             struct agx_workgroup local_size)
798 {
799    struct hk_device *dev = hk_cmd_buffer_device(cmd);
800    uint32_t usc = hk_upload_usc_words(cmd, s, s->only_linked);
801 
802    hk_reserve_scratch(cmd, cs, s);
803    hk_dispatch_with_usc(dev, cs, &s->b.info, usc, grid, local_size);
804 }
805 
806 void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid,
807                          enum agx_barrier barrier, enum libagx_program idx,
808                          void *data, size_t data_size);
809 
810 #define MESA_DISPATCH_PRECOMP hk_dispatch_precomp
811 
812 void hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value,
813                     bool after_gfx);
814