• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28  * SOFTWARE.
29  */
30 
31 #ifndef PVR_PRIVATE_H
32 #define PVR_PRIVATE_H
33 
34 #include <assert.h>
35 #include <stdbool.h>
36 #include <stdint.h>
37 #include <vulkan/vulkan.h>
38 
39 #include "compiler/shader_enums.h"
40 #include "hwdef/rogue_hw_defs.h"
41 #include "pco/pco.h"
42 #include "pco/pco_data.h"
43 #include "pvr_border.h"
44 #include "pvr_clear.h"
45 #include "pvr_common.h"
46 #include "pvr_csb.h"
47 #include "pvr_device_info.h"
48 #include "pvr_entrypoints.h"
49 #include "pvr_hw_pass.h"
50 #include "pvr_job_render.h"
51 #include "pvr_limits.h"
52 #include "pvr_pds.h"
53 #include "usc/programs/pvr_shader_factory.h"
54 #include "pvr_spm.h"
55 #include "pvr_types.h"
56 #include "pvr_winsys.h"
57 #include "rogue/rogue.h"
58 #include "util/bitscan.h"
59 #include "util/format/u_format.h"
60 #include "util/log.h"
61 #include "util/macros.h"
62 #include "util/simple_mtx.h"
63 #include "util/u_dynarray.h"
64 #include "util/u_math.h"
65 #include "vk_buffer.h"
66 #include "vk_buffer_view.h"
67 #include "vk_command_buffer.h"
68 #include "vk_device.h"
69 #include "vk_enum_to_str.h"
70 #include "vk_graphics_state.h"
71 #include "vk_image.h"
72 #include "vk_instance.h"
73 #include "vk_log.h"
74 #include "vk_physical_device.h"
75 #include "vk_queue.h"
76 #include "vk_sync.h"
77 #include "wsi_common.h"
78 
79 #ifdef HAVE_VALGRIND
80 #   include <valgrind/valgrind.h>
81 #   include <valgrind/memcheck.h>
82 #   define VG(x) x
83 #else
84 #   define VG(x) ((void)0)
85 #endif
86 
87 struct pvr_bo;
88 struct pvr_bo_store;
89 struct pvr_compute_ctx;
90 struct pvr_compute_pipeline;
91 struct pvr_free_list;
92 struct pvr_graphics_pipeline;
93 struct pvr_instance;
94 struct pvr_render_ctx;
95 struct rogue_compiler;
96 
97 struct pvr_physical_device {
98    struct vk_physical_device vk;
99 
100    /* Back-pointer to instance */
101    struct pvr_instance *instance;
102 
103    char *render_path;
104    char *display_path;
105 
106    struct pvr_winsys *ws;
107    struct pvr_device_info dev_info;
108    struct pvr_device_runtime_info dev_runtime_info;
109 
110    VkPhysicalDeviceMemoryProperties memory;
111 
112    uint64_t heap_used;
113 
114    struct wsi_device wsi_device;
115 
116    struct rogue_compiler *compiler;
117    pco_ctx *pco_ctx;
118 };
119 
120 struct pvr_instance {
121    struct vk_instance vk;
122 
123    uint32_t active_device_count;
124 };
125 
126 struct pvr_queue {
127    struct vk_queue vk;
128 
129    struct pvr_device *device;
130 
131    struct pvr_render_ctx *gfx_ctx;
132    struct pvr_compute_ctx *compute_ctx;
133    struct pvr_compute_ctx *query_ctx;
134    struct pvr_transfer_ctx *transfer_ctx;
135 
136    struct vk_sync *last_job_signal_sync[PVR_JOB_TYPE_MAX];
137    struct vk_sync *next_job_wait_sync[PVR_JOB_TYPE_MAX];
138 };
139 
140 struct pvr_vertex_binding {
141    struct pvr_buffer *buffer;
142    VkDeviceSize offset;
143 };
144 
145 struct pvr_pds_upload {
146    struct pvr_suballoc_bo *pvr_bo;
147    /* Offset from the pds heap base address. */
148    uint32_t data_offset;
149    /* Offset from the pds heap base address. */
150    uint32_t code_offset;
151 
152    /* data_size + code_size = program_size. */
153    uint32_t data_size;
154    uint32_t code_size;
155 };
156 
157 struct pvr_compute_query_shader {
158    struct pvr_suballoc_bo *usc_bo;
159 
160    struct pvr_pds_upload pds_prim_code;
161    uint32_t primary_data_size_dw;
162    uint32_t primary_num_temps;
163 
164    struct pvr_pds_info info;
165    struct pvr_pds_upload pds_sec_code;
166 };
167 
168 struct pvr_device {
169    struct vk_device vk;
170    struct pvr_instance *instance;
171    struct pvr_physical_device *pdevice;
172 
173    struct pvr_winsys *ws;
174    struct pvr_winsys_heaps heaps;
175 
176    struct pvr_free_list *global_free_list;
177 
178    struct pvr_queue *queues;
179    uint32_t queue_count;
180 
181    /* Running count of the number of job submissions across all queue. */
182    uint32_t global_cmd_buffer_submit_count;
183 
184    /* Running count of the number of presentations across all queues. */
185    uint32_t global_queue_present_count;
186 
187    uint32_t pixel_event_data_size_in_dwords;
188 
189    uint64_t input_attachment_sampler;
190 
191    struct pvr_pds_upload pds_compute_fence_program;
192    struct pvr_pds_upload pds_compute_empty_program;
193 
194    /* Compute shaders for queries. */
195    struct pvr_compute_query_shader availability_shader;
196    struct pvr_compute_query_shader *copy_results_shaders;
197    struct pvr_compute_query_shader *reset_queries_shaders;
198 
199    struct pvr_suballocator suballoc_general;
200    struct pvr_suballocator suballoc_pds;
201    struct pvr_suballocator suballoc_transfer;
202    struct pvr_suballocator suballoc_usc;
203    struct pvr_suballocator suballoc_vis_test;
204 
205    struct {
206       struct pvr_pds_upload pds;
207       struct pvr_suballoc_bo *usc;
208    } nop_program;
209 
210    /* Issue Data Fence, Wait for Data Fence state. */
211    struct {
212       uint32_t usc_shareds;
213       struct pvr_suballoc_bo *usc;
214 
215       /* Buffer in which the IDF/WDF program performs store ops. */
216       struct pvr_bo *store_bo;
217       /* Contains the initialization values for the shared registers. */
218       struct pvr_bo *shareds_bo;
219 
220       struct pvr_pds_upload pds;
221       struct pvr_pds_upload sw_compute_barrier_pds;
222    } idfwdf_state;
223 
224    struct pvr_device_static_clear_state {
225       struct pvr_suballoc_bo *usc_vertex_shader_bo;
226       struct pvr_suballoc_bo *vertices_bo;
227       struct pvr_pds_upload pds;
228 
229       /* Only valid if PVR_HAS_FEATURE(dev_info, gs_rta_support). */
230       struct pvr_suballoc_bo *usc_multi_layer_vertex_shader_bo;
231 
232       struct pvr_static_clear_ppp_base ppp_base;
233       /* Indexable using VkImageAspectFlags. */
234       struct pvr_static_clear_ppp_template
235          ppp_templates[PVR_STATIC_CLEAR_VARIANT_COUNT];
236 
237       const uint32_t *vdm_words;
238       const uint32_t *large_clear_vdm_words;
239 
240       struct pvr_suballoc_bo *usc_clear_attachment_programs;
241       struct pvr_suballoc_bo *pds_clear_attachment_programs;
242       /* TODO: See if we can use PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT to save some
243        * memory.
244        */
245       struct pvr_pds_clear_attachment_program_info {
246          pvr_dev_addr_t texture_program_offset;
247          pvr_dev_addr_t pixel_program_offset;
248 
249          uint32_t texture_program_pds_temps_count;
250          /* Size in dwords. */
251          uint32_t texture_program_data_size;
252       } pds_clear_attachment_program_info
253          [PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES];
254    } static_clear_state;
255 
256    struct {
257       struct pvr_suballoc_bo *usc_programs;
258       struct pvr_suballoc_bo *pds_programs;
259 
260       struct pvr_spm_per_load_program_state {
261          pvr_dev_addr_t pds_pixel_program_offset;
262          pvr_dev_addr_t pds_uniform_program_offset;
263 
264          uint32_t pds_texture_program_data_size;
265          uint32_t pds_texture_program_temps_count;
266       } load_program[PVR_SPM_LOAD_PROGRAM_COUNT];
267    } spm_load_state;
268 
269    struct pvr_device_tile_buffer_state {
270       simple_mtx_t mtx;
271 
272 #define PVR_MAX_TILE_BUFFER_COUNT 7U
273       struct pvr_bo *buffers[PVR_MAX_TILE_BUFFER_COUNT];
274       uint32_t buffer_count;
275    } tile_buffer_state;
276 
277    struct pvr_spm_scratch_buffer_store spm_scratch_buffer_store;
278 
279    struct pvr_bo_store *bo_store;
280 
281    struct pvr_bo *robustness_buffer;
282 
283    struct vk_sync *presignaled_sync;
284 
285    struct pvr_border_color_table border_color_table;
286 };
287 
288 struct pvr_device_memory {
289    struct vk_object_base base;
290    struct pvr_winsys_bo *bo;
291 };
292 
293 struct pvr_mip_level {
294    /* Offset of the mip level in bytes */
295    uint32_t offset;
296 
297    /* Aligned mip level size in bytes */
298    uint32_t size;
299 
300    /* Aligned row length in bytes */
301    uint32_t pitch;
302 
303    /* Aligned height in bytes */
304    uint32_t height_pitch;
305 };
306 
307 struct pvr_image {
308    struct vk_image vk;
309 
310    /* vma this image is bound to */
311    struct pvr_winsys_vma *vma;
312 
313    /* Device address the image is mapped to in device virtual address space */
314    pvr_dev_addr_t dev_addr;
315 
316    /* Derived and other state */
317    VkExtent3D physical_extent;
318    enum pvr_memlayout memlayout;
319    VkDeviceSize layer_size;
320    VkDeviceSize size;
321 
322    VkDeviceSize alignment;
323 
324    struct pvr_mip_level mip_levels[14];
325 };
326 
327 struct pvr_buffer {
328    struct vk_buffer vk;
329 
330    /* Derived and other state */
331    uint32_t alignment;
332    /* vma this buffer is bound to */
333    struct pvr_winsys_vma *vma;
334    /* Device address the buffer is mapped to in device virtual address space */
335    pvr_dev_addr_t dev_addr;
336 };
337 
338 struct pvr_image_view {
339    struct vk_image_view vk;
340 
341    /* Prepacked Texture Image dword 0 and 1. It will be copied to the
342     * descriptor info during pvr_UpdateDescriptorSets().
343     *
344     * We create separate texture states for sampling, storage and input
345     * attachment cases.
346     */
347    uint64_t texture_state[PVR_TEXTURE_STATE_MAX_ENUM][2];
348 };
349 
350 struct pvr_buffer_view {
351    struct vk_buffer_view vk;
352 
353    /* Prepacked Texture dword 0 and 1. It will be copied to the descriptor
354     * during pvr_UpdateDescriptorSets().
355     */
356    uint64_t texture_state[2];
357 };
358 
359 #define PVR_TRANSFER_MAX_SOURCES 10U
360 #define PVR_TRANSFER_MAX_CUSTOM_MAPPINGS 6U
361 
362 /** A surface describes a source or destination for a transfer operation. */
363 struct pvr_transfer_cmd_surface {
364    pvr_dev_addr_t dev_addr;
365 
366    /* Memory address for extra U/V planes. */
367    pvr_dev_addr_t uv_address[2];
368 
369    /* Surface width in texels. */
370    uint32_t width;
371 
372    /* Surface height in texels. */
373    uint32_t height;
374 
375    uint32_t depth;
376 
377    /* Z position in a 3D tecture. 0.0f <= z_position <= depth. */
378    float z_position;
379 
380    /* Stride in texels. */
381    uint32_t stride;
382 
383    VkFormat vk_format;
384 
385    enum pvr_memlayout mem_layout;
386 
387    uint32_t sample_count;
388 };
389 
390 struct pvr_rect_mapping {
391    VkRect2D src_rect;
392    VkRect2D dst_rect;
393    bool flip_x;
394    bool flip_y;
395 };
396 
397 struct pvr_transfer_cmd_source {
398    struct pvr_transfer_cmd_surface surface;
399 
400    uint32_t mapping_count;
401    struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS];
402 
403    /* In the case of a simple 1:1 copy, this setting does not affect the output
404     * but will affect performance. Use clamp to edge when possible.
405     */
406    /* This is of type enum ROGUE_TEXSTATE_ADDRMODE. */
407    int addr_mode;
408 
409    /* Source filtering method. */
410    enum pvr_filter filter;
411 
412    /* MSAA resolve operation. */
413    enum pvr_resolve_op resolve_op;
414 };
415 
416 struct pvr_transfer_cmd {
417    /* Node to link this cmd into the transfer_cmds list in
418     * pvr_sub_cmd::transfer structure.
419     */
420    struct list_head link;
421 
422    uint32_t flags;
423 
424    uint32_t source_count;
425 
426    struct pvr_transfer_cmd_source sources[PVR_TRANSFER_MAX_SOURCES];
427 
428    union fi clear_color[4];
429 
430    struct pvr_transfer_cmd_surface dst;
431 
432    VkRect2D scissor;
433 
434    /* Pointer to cmd buffer this transfer cmd belongs to. This is mainly used
435     * to link buffer objects allocated during job submission into
436     * cmd_buffer::bo_list head.
437     */
438    struct pvr_cmd_buffer *cmd_buffer;
439 
440    /* Deferred RTA clears are allocated from pvr_cmd_buffer->deferred_clears and
441     * cannot be freed directly.
442     */
443    bool is_deferred_clear;
444 };
445 
446 struct pvr_sub_cmd_gfx {
447    const struct pvr_framebuffer *framebuffer;
448 
449    struct pvr_render_job job;
450 
451    struct pvr_suballoc_bo *depth_bias_bo;
452    struct pvr_suballoc_bo *scissor_bo;
453 
454    /* Tracking how the loaded depth/stencil values are being used. */
455    enum pvr_depth_stencil_usage depth_usage;
456    enum pvr_depth_stencil_usage stencil_usage;
457 
458    /* Tracking whether the subcommand modifies depth/stencil. */
459    bool modifies_depth;
460    bool modifies_stencil;
461 
462    /* Store the render to a scratch buffer. */
463    bool barrier_store;
464    /* Load the render (stored with a `barrier_store`) as a background to the
465     * current render.
466     */
467    bool barrier_load;
468 
469    const struct pvr_query_pool *query_pool;
470    struct util_dynarray sec_query_indices;
471 
472    /* Control stream builder object */
473    struct pvr_csb control_stream;
474 
475    /* Required iff pvr_sub_cmd_gfx_requires_split_submit() returns true. */
476    struct pvr_bo *terminate_ctrl_stream;
477 
478    uint32_t hw_render_idx;
479 
480    uint32_t max_tiles_in_flight;
481 
482    bool empty_cmd;
483 
484    /* True if any fragment shader used in this sub command uses atomic
485     * operations.
486     */
487    bool frag_uses_atomic_ops;
488 
489    bool disable_compute_overlap;
490 
491    /* True if any fragment shader used in this sub command has side
492     * effects.
493     */
494    bool frag_has_side_effects;
495 
496    /* True if any vertex shader used in this sub command contains both
497     * texture reads and texture writes.
498     */
499    bool vertex_uses_texture_rw;
500 
501    /* True if any fragment shader used in this sub command contains
502     * both texture reads and texture writes.
503     */
504    bool frag_uses_texture_rw;
505 
506    bool has_occlusion_query;
507 
508    bool wait_on_previous_transfer;
509 };
510 
511 struct pvr_sub_cmd_compute {
512    /* Control stream builder object. */
513    struct pvr_csb control_stream;
514 
515    uint32_t num_shared_regs;
516 
517    /* True if any shader used in this sub command uses atomic
518     * operations.
519     */
520    bool uses_atomic_ops;
521 
522    bool uses_barrier;
523 
524    bool pds_sw_barrier_requires_clearing;
525 };
526 
527 struct pvr_sub_cmd_transfer {
528    bool serialize_with_frag;
529 
530    /* Pointer to the actual transfer command list, allowing primary and
531     * secondary sub-commands to share the same list.
532     */
533    struct list_head *transfer_cmds;
534 
535    /* List of pvr_transfer_cmd type structures. Do not access the list
536     * directly, but always use the transfer_cmds pointer above.
537     */
538    struct list_head transfer_cmds_priv;
539 };
540 
541 struct pvr_sub_cmd_event {
542    enum pvr_event_type type;
543 
544    union {
545       struct pvr_sub_cmd_event_set_reset {
546          struct pvr_event *event;
547          /* Stages to wait for until the event is set or reset. */
548          uint32_t wait_for_stage_mask;
549       } set_reset;
550 
551       struct pvr_sub_cmd_event_wait {
552          uint32_t count;
553          /* Events to wait for before resuming. */
554          struct pvr_event **events;
555          /* Stages to wait at. */
556          uint32_t *wait_at_stage_masks;
557       } wait;
558 
559       struct pvr_sub_cmd_event_barrier {
560          /* Stages to wait for. */
561          uint32_t wait_for_stage_mask;
562          /* Stages to wait at. */
563          uint32_t wait_at_stage_mask;
564       } barrier;
565    };
566 };
567 
568 struct pvr_sub_cmd {
569    /* This links the subcommand in pvr_cmd_buffer:sub_cmds list. */
570    struct list_head link;
571 
572    enum pvr_sub_cmd_type type;
573 
574    /* True if the sub_cmd is owned by this command buffer. False if taken from
575     * a secondary command buffer, in that case we are not supposed to free any
576     * resources associated with the sub_cmd.
577     */
578    bool owned;
579 
580    union {
581       struct pvr_sub_cmd_gfx gfx;
582       struct pvr_sub_cmd_compute compute;
583       struct pvr_sub_cmd_transfer transfer;
584       struct pvr_sub_cmd_event event;
585    };
586 };
587 
588 struct pvr_render_pass_info {
589    const struct pvr_render_pass *pass;
590    struct pvr_framebuffer *framebuffer;
591 
592    struct pvr_image_view **attachments;
593 
594    uint32_t subpass_idx;
595    uint32_t current_hw_subpass;
596 
597    VkRect2D render_area;
598 
599    uint32_t clear_value_count;
600    VkClearValue *clear_values;
601 
602    VkPipelineBindPoint pipeline_bind_point;
603 
604    bool process_empty_tiles;
605    bool enable_bg_tag;
606    uint32_t isp_userpass;
607 };
608 
609 struct pvr_ppp_state {
610    uint32_t header;
611 
612    struct {
613       /* TODO: Can we get rid of the "control" field? */
614       struct ROGUE_TA_STATE_ISPCTL control_struct;
615       uint32_t control;
616 
617       uint32_t front_a;
618       uint32_t front_b;
619       uint32_t back_a;
620       uint32_t back_b;
621    } isp;
622 
623    struct pvr_ppp_dbsc {
624       uint16_t scissor_index;
625       uint16_t depthbias_index;
626    } depthbias_scissor_indices;
627 
628    struct {
629       uint32_t pixel_shader_base;
630       uint32_t texture_uniform_code_base;
631       uint32_t size_info1;
632       uint32_t size_info2;
633       uint32_t varying_base;
634       uint32_t texture_state_data_base;
635       uint32_t uniform_state_data_base;
636    } pds;
637 
638    struct {
639       uint32_t word0;
640       uint32_t word1;
641    } region_clipping;
642 
643    struct {
644       uint32_t a0;
645       uint32_t m0;
646       uint32_t a1;
647       uint32_t m1;
648       uint32_t a2;
649       uint32_t m2;
650    } viewports[PVR_MAX_VIEWPORTS];
651 
652    uint32_t viewport_count;
653 
654    uint32_t output_selects;
655 
656    uint32_t varying_word[2];
657 
658    uint32_t ppp_control;
659 };
660 
661 /* Represents a control stream related command that is deferred for execution in
662  * a secondary command buffer.
663  */
664 struct pvr_deferred_cs_command {
665    enum pvr_deferred_cs_command_type type;
666    union {
667       struct {
668          struct pvr_ppp_dbsc state;
669 
670          uint32_t *vdm_state;
671       } dbsc;
672 
673       struct {
674          struct pvr_ppp_dbsc state;
675 
676          struct pvr_suballoc_bo *ppp_cs_bo;
677          uint32_t patch_offset;
678       } dbsc2;
679    };
680 };
681 
682 struct pvr_cmd_buffer_draw_state {
683    uint32_t base_instance;
684    uint32_t base_vertex;
685    bool draw_indirect;
686    bool draw_indexed;
687 };
688 
689 struct pvr_cmd_buffer_state {
690    /* Pipeline binding. */
691    const struct pvr_graphics_pipeline *gfx_pipeline;
692 
693    const struct pvr_compute_pipeline *compute_pipeline;
694 
695    struct pvr_render_pass_info render_pass_info;
696 
697    struct pvr_sub_cmd *current_sub_cmd;
698 
699    struct pvr_ppp_state ppp_state;
700 
701    struct ROGUE_TA_STATE_HEADER emit_header;
702 
703    struct pvr_vertex_binding vertex_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS];
704 
705    struct {
706       struct pvr_buffer *buffer;
707       VkDeviceSize offset;
708       VkIndexType type;
709    } index_buffer_binding;
710 
711    struct {
712       uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE];
713       VkShaderStageFlags dirty_stages;
714       /* Indicates if the whole push constants buffer was uploaded. This avoids
715        * having to upload the same stuff twice when the push constant range
716        * covers both gfx and compute.
717        */
718       bool uploaded;
719       pvr_dev_addr_t dev_addr;
720    } push_constants;
721 
722    /* Array size of barriers_needed is based on number of sync pipeline
723     * stages.
724     */
725    uint32_t barriers_needed[PVR_NUM_SYNC_PIPELINE_STAGES];
726 
727    struct pvr_descriptor_state gfx_desc_state;
728    struct pvr_descriptor_state compute_desc_state;
729 
730    VkFormat depth_format;
731 
732    struct {
733       bool compute_pipeline_binding : 1;
734       bool compute_desc_dirty : 1;
735 
736       bool gfx_pipeline_binding : 1;
737       bool gfx_desc_dirty : 1;
738 
739       bool vertex_bindings : 1;
740       bool index_buffer_binding : 1;
741       bool vertex_descriptors : 1;
742       bool fragment_descriptors : 1;
743 
744       bool isp_userpass : 1;
745 
746       /* Some draw state needs to be tracked for changes between draw calls
747        * i.e. if we get a draw with baseInstance=0, followed by a call with
748        * baseInstance=1 that needs to cause us to select a different PDS
749        * attrib program and update the BASE_INSTANCE PDS const. If only
750        * baseInstance changes then we just have to update the data section.
751        */
752       bool draw_base_instance : 1;
753       bool draw_variant : 1;
754 
755       bool vis_test;
756    } dirty;
757 
758    struct pvr_cmd_buffer_draw_state draw_state;
759 
760    struct {
761       uint32_t code_offset;
762       const struct pvr_pds_info *info;
763    } pds_shader;
764 
765    const struct pvr_query_pool *query_pool;
766    bool vis_test_enabled;
767    uint32_t vis_reg;
768 
769    struct util_dynarray query_indices;
770 
771    uint32_t max_shared_regs;
772 
773    /* Address of data segment for vertex attrib upload program. */
774    uint32_t pds_vertex_attrib_offset;
775 
776    uint32_t pds_fragment_descriptor_data_offset;
777    uint32_t pds_compute_descriptor_data_offset;
778 };
779 
780 /* Do not change this. This is the format used for the depth_bias_array
781  * elements uploaded to the device.
782  */
783 struct pvr_depth_bias_state {
784    /* Saved information from pCreateInfo. */
785    float constant_factor;
786    float slope_factor;
787    float clamp;
788 };
789 
790 /* Do not change this. This is the format used for the scissor_array
791  * elements uploaded to the device.
792  */
793 struct pvr_scissor_words {
794    /* Contains a packed IPF_SCISSOR_WORD_0. */
795    uint32_t w0;
796    /* Contains a packed IPF_SCISSOR_WORD_1. */
797    uint32_t w1;
798 };
799 
800 struct pvr_cmd_buffer {
801    struct vk_command_buffer vk;
802 
803    struct pvr_device *device;
804 
805    /* Buffer usage flags */
806    VkCommandBufferUsageFlags usage_flags;
807 
808    /* Array of struct pvr_depth_bias_state. */
809    struct util_dynarray depth_bias_array;
810 
811    /* Array of struct pvr_scissor_words. */
812    struct util_dynarray scissor_array;
813    struct pvr_scissor_words scissor_words;
814 
815    struct pvr_cmd_buffer_state state;
816 
817    /* List of struct pvr_deferred_cs_command control stream related commands to
818     * execute in secondary command buffer.
819     */
820    struct util_dynarray deferred_csb_commands;
821    /* List of struct pvr_transfer_cmd used to emulate RTA clears on non RTA
822     * capable cores.
823     */
824    struct util_dynarray deferred_clears;
825 
826    /* List of pvr_bo structs associated with this cmd buffer. */
827    struct list_head bo_list;
828 
829    struct list_head sub_cmds;
830 };
831 
832 struct pvr_stage_allocation_descriptor_state {
833    struct pvr_pds_upload pds_code;
834    /* Since we upload the code segment separately from the data segment
835     * pds_code->data_size might be 0 whilst
836     * pds_info->data_size_in_dwords might be >0 in the case of this struct
837     * referring to the code upload.
838     */
839    struct pvr_pds_info pds_info;
840 
841    /* Already setup compile time static consts. */
842    struct pvr_suballoc_bo *static_consts;
843 };
844 
845 struct pvr_pds_attrib_program {
846    struct pvr_pds_info info;
847    /* The uploaded PDS program stored here only contains the code segment,
848     * meaning the data size will be 0, unlike the data size stored in the
849     * 'info' member above.
850     */
851    struct pvr_pds_upload program;
852 };
853 
854 struct pvr_pipeline_stage_state {
855    uint32_t pds_temps_count;
856 };
857 
858 struct pvr_compute_shader_state {
859    /* Pointer to a buffer object that contains the shader binary. */
860    struct pvr_suballoc_bo *bo;
861 
862    bool uses_atomic_ops;
863    bool uses_barrier;
864    /* E.g. GLSL shader uses gl_NumWorkGroups. */
865    bool uses_num_workgroups;
866 
867    uint32_t const_shared_reg_count;
868    uint32_t input_register_count;
869    uint32_t work_size;
870    uint32_t coefficient_register_count;
871 };
872 
873 struct pvr_vertex_shader_state {
874    /* Pointer to a buffer object that contains the shader binary. */
875    struct pvr_suballoc_bo *bo;
876 
877    struct pvr_pds_attrib_program
878       pds_attrib_programs[PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT];
879 
880    struct pvr_pipeline_stage_state stage_state;
881    /* FIXME: Move this into stage_state? */
882    struct pvr_stage_allocation_descriptor_state descriptor_state;
883 };
884 
885 struct pvr_fragment_shader_state {
886    /* Pointer to a buffer object that contains the shader binary. */
887    struct pvr_suballoc_bo *bo;
888 
889    struct pvr_pipeline_stage_state stage_state;
890    /* FIXME: Move this into stage_state? */
891    struct pvr_stage_allocation_descriptor_state descriptor_state;
892    enum ROGUE_TA_PASSTYPE pass_type;
893    enum ROGUE_PDSINST_DOUTU_SAMPLE_RATE sample_rate;
894 
895    struct pvr_pds_upload pds_coeff_program;
896    struct pvr_pds_upload pds_fragment_program;
897 };
898 
899 struct pvr_pipeline {
900    struct vk_object_base base;
901 
902    enum pvr_pipeline_type type;
903 
904    /* Saved information from pCreateInfo. */
905    struct pvr_pipeline_layout *layout;
906 
907    VkPipelineCreateFlags2KHR pipeline_flags;
908 };
909 
910 struct pvr_compute_pipeline {
911    struct pvr_pipeline base;
912 
913    struct pvr_compute_shader_state shader_state;
914 
915    struct {
916       uint32_t base_workgroup : 1;
917    } flags;
918 
919    struct pvr_stage_allocation_descriptor_state descriptor_state;
920 
921    struct pvr_pds_upload primary_program;
922    struct pvr_pds_info primary_program_info;
923 
924    struct pvr_pds_base_workgroup_program {
925       struct pvr_pds_upload code_upload;
926 
927       uint32_t *data_section;
928       /* Offset within the PDS data section at which the base workgroup id
929        * resides.
930        */
931       uint32_t base_workgroup_data_patching_offset;
932 
933       struct pvr_pds_info info;
934    } primary_base_workgroup_variant_program;
935 };
936 
937 struct pvr_graphics_pipeline {
938    struct pvr_pipeline base;
939 
940    struct vk_dynamic_graphics_state dynamic_state;
941 
942    /* Derived and other state */
943    size_t stage_indices[MESA_SHADER_STAGES];
944 
945    pco_data vs_data;
946    pco_data fs_data;
947 
948    struct {
949       struct pvr_vertex_shader_state vertex;
950       struct pvr_fragment_shader_state fragment;
951    } shader_state;
952 };
953 
954 struct pvr_query_pool {
955    struct vk_object_base base;
956 
957    /* Stride of result_buffer to get to the start of the results for the next
958     * Phantom.
959     */
960    uint32_t result_stride;
961 
962    uint32_t query_count;
963 
964    struct pvr_suballoc_bo *result_buffer;
965    struct pvr_suballoc_bo *availability_buffer;
966 };
967 
968 struct pvr_private_compute_pipeline {
969    /* Used by pvr_compute_update_kernel_private(). */
970    uint32_t pds_code_offset;
971    uint32_t pds_data_offset;
972    uint32_t pds_data_size_dw;
973    uint32_t pds_temps_used;
974    uint32_t coeff_regs_count;
975    uint32_t unified_store_regs_count;
976    VkExtent3D workgroup_size;
977 
978    /* Used by pvr_compute_update_shared_private(). */
979    uint32_t pds_shared_update_code_offset;
980    uint32_t pds_shared_update_data_offset;
981    uint32_t pds_shared_update_data_size_dw;
982 
983    /* Used by both pvr_compute_update_{kernel,shared}_private(). */
984    uint32_t const_shared_regs_count;
985 
986    pvr_dev_addr_t const_buffer_addr;
987 };
988 
989 struct pvr_query_info {
990    enum pvr_query_type type;
991 
992    union {
993       struct {
994          uint32_t num_query_indices;
995          struct pvr_suballoc_bo *index_bo;
996          uint32_t num_queries;
997          struct pvr_suballoc_bo *availability_bo;
998       } availability_write;
999 
1000       struct {
1001          VkQueryPool query_pool;
1002          uint32_t first_query;
1003          uint32_t query_count;
1004       } reset_query_pool;
1005 
1006       struct {
1007          VkQueryPool query_pool;
1008          uint32_t first_query;
1009          uint32_t query_count;
1010          VkBuffer dst_buffer;
1011          VkDeviceSize dst_offset;
1012          VkDeviceSize stride;
1013          VkQueryResultFlags flags;
1014       } copy_query_results;
1015    };
1016 };
1017 
1018 struct pvr_render_target {
1019    struct pvr_rt_dataset *rt_dataset;
1020 
1021    pthread_mutex_t mutex;
1022 
1023    bool valid;
1024 };
1025 
1026 struct pvr_framebuffer {
1027    struct vk_object_base base;
1028 
1029    /* Saved information from pCreateInfo. */
1030    uint32_t width;
1031    uint32_t height;
1032    uint32_t layers;
1033 
1034    uint32_t attachment_count;
1035    struct pvr_image_view **attachments;
1036 
1037    /* Derived and other state. */
1038    struct pvr_suballoc_bo *ppp_state_bo;
1039    /* PPP state size in dwords. */
1040    size_t ppp_state_size;
1041 
1042    uint32_t render_targets_count;
1043    struct pvr_render_target *render_targets;
1044 
1045    struct pvr_spm_scratch_buffer *scratch_buffer;
1046 
1047    uint32_t render_count;
1048    struct pvr_spm_eot_state *spm_eot_state_per_render;
1049    struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
1050 };
1051 
1052 struct pvr_render_pass_attachment {
1053    /* Saved information from pCreateInfo. */
1054    VkAttachmentLoadOp load_op;
1055 
1056    VkAttachmentStoreOp store_op;
1057 
1058    VkAttachmentLoadOp stencil_load_op;
1059 
1060    VkAttachmentStoreOp stencil_store_op;
1061 
1062    VkFormat vk_format;
1063    uint32_t sample_count;
1064    VkImageLayout initial_layout;
1065 
1066    /* Derived and other state. */
1067    VkImageAspectFlags aspects;
1068 
1069    /* Can this surface be resolved by the PBE. */
1070    bool is_pbe_downscalable;
1071 
1072    uint32_t index;
1073 };
1074 
1075 struct pvr_render_subpass {
1076    /* Saved information from pCreateInfo. */
1077    /* The number of samples per color attachment (or depth attachment if
1078     * z-only).
1079     */
1080    /* FIXME: rename to 'samples' to match struct pvr_image */
1081    uint32_t sample_count;
1082 
1083    uint32_t color_count;
1084    uint32_t *color_attachments;
1085    uint32_t *resolve_attachments;
1086 
1087    uint32_t input_count;
1088    uint32_t *input_attachments;
1089 
1090    uint32_t depth_stencil_attachment;
1091 
1092    /*  Derived and other state. */
1093    uint32_t dep_count;
1094    uint32_t *dep_list;
1095 
1096    /* Array with dep_count elements. flush_on_dep[x] is true if this subpass
1097     * and the subpass dep_list[x] can't be in the same hardware render.
1098     */
1099    bool *flush_on_dep;
1100 
1101    uint32_t index;
1102 
1103    uint32_t isp_userpass;
1104 
1105    VkPipelineBindPoint pipeline_bind_point;
1106 };
1107 
1108 struct pvr_render_pass {
1109    struct vk_object_base base;
1110 
1111    /* Saved information from pCreateInfo. */
1112    uint32_t attachment_count;
1113 
1114    struct pvr_render_pass_attachment *attachments;
1115 
1116    uint32_t subpass_count;
1117 
1118    struct pvr_render_subpass *subpasses;
1119 
1120    struct pvr_renderpass_hwsetup *hw_setup;
1121 
1122    /*  Derived and other state. */
1123    /* FIXME: rename to 'max_samples' as we use 'samples' elsewhere */
1124    uint32_t max_sample_count;
1125 
1126    /* The maximum number of tile buffers to use in any subpass. */
1127    uint32_t max_tilebuffer_count;
1128 };
1129 
1130 /* Max render targets for the clears loads state in load op.
1131  * To account for resolve attachments, double the color attachments.
1132  */
1133 #define PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS (PVR_MAX_COLOR_ATTACHMENTS * 2)
1134 
1135 struct pvr_load_op {
1136    bool is_hw_object;
1137 
1138    struct pvr_suballoc_bo *usc_frag_prog_bo;
1139    uint32_t const_shareds_count;
1140    uint32_t shareds_dest_offset;
1141    uint32_t shareds_count;
1142 
1143    struct pvr_pds_upload pds_frag_prog;
1144 
1145    struct pvr_pds_upload pds_tex_state_prog;
1146    uint32_t temps_count;
1147 
1148    union {
1149       const struct pvr_renderpass_hwsetup_render *hw_render;
1150       const struct pvr_render_subpass *subpass;
1151    };
1152 
1153    /* TODO: We might not need to keep all of this around. Some stuff might just
1154     * be for the compiler to ingest which we can then discard.
1155     */
1156    struct {
1157       uint16_t rt_clear_mask;
1158       uint16_t rt_load_mask;
1159 
1160       uint16_t unresolved_msaa_mask;
1161 
1162       /* The format to write to the output regs. */
1163       VkFormat dest_vk_format[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS];
1164 
1165 #define PVR_NO_DEPTH_CLEAR_TO_REG (-1)
1166       /* If >= 0, write a depth clear value to the specified pixel output. */
1167       int32_t depth_clear_to_reg;
1168    } clears_loads_state;
1169 };
1170 
1171 #define CHECK_MASK_SIZE(_struct_type, _field_name, _nr_bits)               \
1172    static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) * 8 >= \
1173                     _nr_bits,                                              \
1174                  #_field_name " mask of struct " #_struct_type " too small")
1175 
1176 CHECK_MASK_SIZE(pvr_load_op,
1177                 clears_loads_state.rt_clear_mask,
1178                 PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
1179 CHECK_MASK_SIZE(pvr_load_op,
1180                 clears_loads_state.rt_load_mask,
1181                 PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
1182 CHECK_MASK_SIZE(pvr_load_op,
1183                 clears_loads_state.unresolved_msaa_mask,
1184                 PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
1185 
1186 #undef CHECK_MASK_SIZE
1187 
1188 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
1189    const struct pvr_device_info *dev_info,
1190    const struct pvr_device_runtime_info *dev_runtime_info,
1191    uint32_t fs_common_size,
1192    uint32_t min_tiles_in_flight);
1193 
1194 VkResult pvr_wsi_init(struct pvr_physical_device *pdevice);
1195 void pvr_wsi_finish(struct pvr_physical_device *pdevice);
1196 
1197 VkResult pvr_queues_create(struct pvr_device *device,
1198                            const VkDeviceCreateInfo *pCreateInfo);
1199 void pvr_queues_destroy(struct pvr_device *device);
1200 
1201 VkResult pvr_bind_memory(struct pvr_device *device,
1202                          struct pvr_device_memory *mem,
1203                          VkDeviceSize offset,
1204                          VkDeviceSize size,
1205                          VkDeviceSize alignment,
1206                          struct pvr_winsys_vma **const vma_out,
1207                          pvr_dev_addr_t *const dev_addr_out);
1208 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma);
1209 VkResult pvr_gpu_upload(struct pvr_device *device,
1210                         struct pvr_winsys_heap *heap,
1211                         const void *data,
1212                         size_t size,
1213                         uint64_t alignment,
1214                         struct pvr_suballoc_bo **const pvr_bo_out);
1215 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
1216                             const uint32_t *data,
1217                             uint32_t data_size_dwords,
1218                             uint32_t data_alignment,
1219                             const uint32_t *code,
1220                             uint32_t code_size_dwords,
1221                             uint32_t code_alignment,
1222                             uint64_t min_alignment,
1223                             struct pvr_pds_upload *const pds_upload_out);
1224 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
1225                             const void *code,
1226                             size_t code_size,
1227                             uint64_t code_alignment,
1228                             struct pvr_suballoc_bo **const pvr_bo_out);
1229 
1230 VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
1231                                          struct pvr_transfer_cmd *transfer_cmd);
1232 
1233 VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer,
1234                                   struct pvr_winsys_heap *heap,
1235                                   uint64_t size,
1236                                   struct pvr_suballoc_bo **const pvr_bo_out);
1237 
1238 void pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info,
1239                                    const uint32_t vs_output_size,
1240                                    const bool raster_enable,
1241                                    uint32_t *const cam_size_out,
1242                                    uint32_t *const vs_max_instances_out);
1243 
1244 void pvr_get_image_subresource_layout(const struct pvr_image *image,
1245                                       const VkImageSubresource *subresource,
1246                                       VkSubresourceLayout *layout);
1247 
1248 static inline struct pvr_compute_pipeline *
to_pvr_compute_pipeline(struct pvr_pipeline * pipeline)1249 to_pvr_compute_pipeline(struct pvr_pipeline *pipeline)
1250 {
1251    assert(pipeline->type == PVR_PIPELINE_TYPE_COMPUTE);
1252    return container_of(pipeline, struct pvr_compute_pipeline, base);
1253 }
1254 
1255 static inline struct pvr_graphics_pipeline *
to_pvr_graphics_pipeline(struct pvr_pipeline * pipeline)1256 to_pvr_graphics_pipeline(struct pvr_pipeline *pipeline)
1257 {
1258    assert(pipeline->type == PVR_PIPELINE_TYPE_GRAPHICS);
1259    return container_of(pipeline, struct pvr_graphics_pipeline, base);
1260 }
1261 
1262 static inline const struct pvr_image *
vk_to_pvr_image(const struct vk_image * image)1263 vk_to_pvr_image(const struct vk_image *image)
1264 {
1265    return container_of(image, const struct pvr_image, vk);
1266 }
1267 
1268 static inline const struct pvr_image *
pvr_image_view_get_image(const struct pvr_image_view * const iview)1269 pvr_image_view_get_image(const struct pvr_image_view *const iview)
1270 {
1271    return vk_to_pvr_image(iview->vk.image);
1272 }
1273 
1274 static enum pvr_pipeline_stage_bits
pvr_stage_mask(VkPipelineStageFlags2 stage_mask)1275 pvr_stage_mask(VkPipelineStageFlags2 stage_mask)
1276 {
1277    enum pvr_pipeline_stage_bits stages = 0;
1278 
1279    if (stage_mask & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)
1280       return PVR_PIPELINE_STAGE_ALL_BITS;
1281 
1282    if (stage_mask & (VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT))
1283       stages |= PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS;
1284 
1285    if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
1286                      VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
1287                      VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
1288                      VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
1289                      VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
1290                      VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
1291       stages |= PVR_PIPELINE_STAGE_GEOM_BIT;
1292    }
1293 
1294    if (stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
1295                      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
1296                      VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
1297                      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
1298       stages |= PVR_PIPELINE_STAGE_FRAG_BIT;
1299    }
1300 
1301    if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
1302                      VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
1303       stages |= PVR_PIPELINE_STAGE_COMPUTE_BIT;
1304    }
1305 
1306    if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT))
1307       stages |= PVR_PIPELINE_STAGE_TRANSFER_BIT;
1308 
1309    return stages;
1310 }
1311 
1312 static inline enum pvr_pipeline_stage_bits
pvr_stage_mask_src(VkPipelineStageFlags2 stage_mask)1313 pvr_stage_mask_src(VkPipelineStageFlags2 stage_mask)
1314 {
1315    /* If the source is bottom of pipe, all stages will need to be waited for. */
1316    if (stage_mask & VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
1317       return PVR_PIPELINE_STAGE_ALL_BITS;
1318 
1319    return pvr_stage_mask(stage_mask);
1320 }
1321 
1322 static inline enum pvr_pipeline_stage_bits
pvr_stage_mask_dst(VkPipelineStageFlags2 stage_mask)1323 pvr_stage_mask_dst(VkPipelineStageFlags2 stage_mask)
1324 {
1325    /* If the destination is top of pipe, all stages should be blocked by prior
1326     * commands.
1327     */
1328    if (stage_mask & VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)
1329       return PVR_PIPELINE_STAGE_ALL_BITS;
1330 
1331    return pvr_stage_mask(stage_mask);
1332 }
1333 
pvr_sub_cmd_gfx_requires_split_submit(const struct pvr_sub_cmd_gfx * const sub_cmd)1334 static inline bool pvr_sub_cmd_gfx_requires_split_submit(
1335    const struct pvr_sub_cmd_gfx *const sub_cmd)
1336 {
1337    return sub_cmd->job.run_frag && sub_cmd->framebuffer->layers > 1;
1338 }
1339 
1340 /* This function is intended to be used when the error being set has been
1341  * returned from a function call, i.e. the error happened further down the
1342  * stack. `vk_command_buffer_set_error()` should be used at the point an error
1343  * occurs, i.e. VK_ERROR_* is being passed in.
1344  * This ensures we only ever get the error printed once.
1345  */
1346 static inline VkResult
pvr_cmd_buffer_set_error_unwarned(struct pvr_cmd_buffer * cmd_buffer,VkResult error)1347 pvr_cmd_buffer_set_error_unwarned(struct pvr_cmd_buffer *cmd_buffer,
1348                                   VkResult error)
1349 {
1350    assert(error != VK_SUCCESS);
1351 
1352    if (cmd_buffer->vk.record_result == VK_SUCCESS)
1353       cmd_buffer->vk.record_result = error;
1354 
1355    return error;
1356 }
1357 
1358 enum pvr_msaa_mode {
1359    PVR_MSAA_MODE_UNDEF = 0, /* explicitly treat 0 as undefined */
1360    /* One task for all samples. */
1361    PVR_MSAA_MODE_PIXEL,
1362    /* For on-edge pixels only: separate tasks for each sample. */
1363    PVR_MSAA_MODE_SELECTIVE,
1364    /* For all pixels: separate tasks for each sample. */
1365    PVR_MSAA_MODE_FULL,
1366 };
1367 
1368 VkResult pvr_pds_fragment_program_create_and_upload(
1369    struct pvr_device *device,
1370    const VkAllocationCallbacks *allocator,
1371    pco_shader *fs,
1372    struct pvr_fragment_shader_state *fragment_state);
1373 
1374 VkResult pvr_pds_unitex_state_program_create_and_upload(
1375    struct pvr_device *device,
1376    const VkAllocationCallbacks *allocator,
1377    uint32_t texture_kicks,
1378    uint32_t uniform_kicks,
1379    struct pvr_pds_upload *const pds_upload_out);
1380 
1381 VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
1382                                            uint32_t capacity,
1383                                            uint32_t size_in_bytes);
1384 
1385 VkResult
1386 pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
1387                               const void *const data,
1388                               const size_t size,
1389                               struct pvr_suballoc_bo **const pvr_bo_out);
1390 VkResult pvr_cmd_buffer_upload_pds(struct pvr_cmd_buffer *const cmd_buffer,
1391                                    const uint32_t *data,
1392                                    uint32_t data_size_dwords,
1393                                    uint32_t data_alignment,
1394                                    const uint32_t *code,
1395                                    uint32_t code_size_dwords,
1396                                    uint32_t code_alignment,
1397                                    uint64_t min_alignment,
1398                                    struct pvr_pds_upload *const pds_upload_out);
1399 
1400 VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
1401                                       enum pvr_sub_cmd_type type);
1402 VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer);
1403 
1404 void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
1405                                 struct pvr_sub_cmd_compute *const sub_cmd,
1406                                 bool deallocate_shareds);
1407 void pvr_compute_update_shared_private(
1408    struct pvr_cmd_buffer *cmd_buffer,
1409    struct pvr_sub_cmd_compute *const sub_cmd,
1410    struct pvr_private_compute_pipeline *pipeline);
1411 void pvr_compute_update_kernel_private(
1412    struct pvr_cmd_buffer *cmd_buffer,
1413    struct pvr_sub_cmd_compute *const sub_cmd,
1414    struct pvr_private_compute_pipeline *pipeline,
1415    const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS]);
1416 
1417 size_t pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes(void);
1418 
1419 VkResult pvr_pds_compute_shader_create_and_upload(
1420    struct pvr_device *device,
1421    struct pvr_pds_compute_shader_program *program,
1422    struct pvr_pds_upload *const pds_upload_out);
1423 
1424 VkResult pvr_device_create_compute_query_programs(struct pvr_device *device);
1425 void pvr_device_destroy_compute_query_programs(struct pvr_device *device);
1426 
1427 VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
1428                                const struct pvr_query_info *query_info);
1429 
1430 void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
1431                                     bool start_geom);
1432 
1433 const struct pvr_renderpass_hwsetup_subpass *
1434 pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass);
1435 
1436 void pvr_descriptor_size_info_init(
1437    const struct pvr_device *device,
1438    VkDescriptorType type,
1439    struct pvr_descriptor_size_info *const size_info_out);
1440 
1441 #define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \
1442    VK_FROM_HANDLE(__pvr_type, __name, __handle)
1443 
1444 VK_DEFINE_HANDLE_CASTS(pvr_cmd_buffer,
1445                        vk.base,
1446                        VkCommandBuffer,
1447                        VK_OBJECT_TYPE_COMMAND_BUFFER)
1448 VK_DEFINE_HANDLE_CASTS(pvr_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
1449 VK_DEFINE_HANDLE_CASTS(pvr_instance,
1450                        vk.base,
1451                        VkInstance,
1452                        VK_OBJECT_TYPE_INSTANCE)
1453 VK_DEFINE_HANDLE_CASTS(pvr_physical_device,
1454                        vk.base,
1455                        VkPhysicalDevice,
1456                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
1457 VK_DEFINE_HANDLE_CASTS(pvr_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
1458 
1459 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_device_memory,
1460                                base,
1461                                VkDeviceMemory,
1462                                VK_OBJECT_TYPE_DEVICE_MEMORY)
1463 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
1464 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer,
1465                                vk.base,
1466                                VkBuffer,
1467                                VK_OBJECT_TYPE_BUFFER)
1468 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image_view,
1469                                vk.base,
1470                                VkImageView,
1471                                VK_OBJECT_TYPE_IMAGE_VIEW)
1472 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer_view,
1473                                vk.base,
1474                                VkBufferView,
1475                                VK_OBJECT_TYPE_BUFFER_VIEW)
1476 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set_layout,
1477                                base,
1478                                VkDescriptorSetLayout,
1479                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
1480 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set,
1481                                base,
1482                                VkDescriptorSet,
1483                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
1484 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
1485 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_pool,
1486                                base,
1487                                VkDescriptorPool,
1488                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
1489 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_sampler,
1490                                vk.base,
1491                                VkSampler,
1492                                VK_OBJECT_TYPE_SAMPLER)
1493 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_layout,
1494                                base,
1495                                VkPipelineLayout,
1496                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
1497 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline,
1498                                base,
1499                                VkPipeline,
1500                                VK_OBJECT_TYPE_PIPELINE)
1501 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_query_pool,
1502                                base,
1503                                VkQueryPool,
1504                                VK_OBJECT_TYPE_QUERY_POOL)
1505 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_framebuffer,
1506                                base,
1507                                VkFramebuffer,
1508                                VK_OBJECT_TYPE_FRAMEBUFFER)
1509 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass,
1510                                base,
1511                                VkRenderPass,
1512                                VK_OBJECT_TYPE_RENDER_PASS)
1513 
1514 #define PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer)                  \
1515    do {                                                                      \
1516       struct pvr_cmd_buffer *const _cmd_buffer = (cmd_buffer);               \
1517       const VkResult _record_result =                                        \
1518          vk_command_buffer_get_record_result(&_cmd_buffer->vk);              \
1519                                                                              \
1520       if (_cmd_buffer->vk.state != MESA_VK_COMMAND_BUFFER_STATE_RECORDING) { \
1521          vk_errorf(_cmd_buffer,                                              \
1522                    VK_ERROR_OUT_OF_DEVICE_MEMORY,                            \
1523                    "Command buffer is not in recording state");              \
1524          return;                                                             \
1525       } else if (_record_result < VK_SUCCESS) {                              \
1526          vk_errorf(_cmd_buffer,                                              \
1527                    _record_result,                                           \
1528                    "Skipping function as command buffer has "                \
1529                    "previous build error");                                  \
1530          return;                                                             \
1531       }                                                                      \
1532    } while (0)
1533 
1534 /**
1535  * Print a FINISHME message, including its source location.
1536  */
1537 #define pvr_finishme(format, ...)              \
1538    do {                                        \
1539       static bool reported = false;            \
1540       if (!reported) {                         \
1541          mesa_logw("%s:%d: FINISHME: " format, \
1542                    __FILE__,                   \
1543                    __LINE__,                   \
1544                    ##__VA_ARGS__);             \
1545          reported = true;                      \
1546       }                                        \
1547    } while (false)
1548 
1549 #define PVR_WRITE(_buffer, _value, _offset, _max)                \
1550    do {                                                          \
1551       __typeof__(_value) __value = _value;                       \
1552       uint64_t __offset = _offset;                               \
1553       uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \
1554       static_assert(__same_type(*_buffer, __value),              \
1555                     "Buffer and value type mismatch");           \
1556       assert((__offset + __nr_dwords) <= (_max));                \
1557       assert((__offset % __nr_dwords) == 0U);                    \
1558       _buffer[__offset / __nr_dwords] = __value;                 \
1559    } while (0)
1560 
1561 /* A non-fatal assert. Useful for debugging. */
1562 #if MESA_DEBUG
1563 #   define pvr_assert(x)                                           \
1564       ({                                                           \
1565          if (unlikely(!(x)))                                       \
1566             mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
1567       })
1568 #else
1569 #   define pvr_assert(x)
1570 #endif
1571 
1572 #endif /* PVR_PRIVATE_H */
1573