1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on radv driver which is:
8 * Copyright © 2016 Red Hat.
9 * Copyright © 2016 Bas Nieuwenhuizen
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
20 * Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * SOFTWARE.
29 */
30
31 #ifndef PVR_PRIVATE_H
32 #define PVR_PRIVATE_H
33
34 #include <assert.h>
35 #include <stdbool.h>
36 #include <stdint.h>
37 #include <vulkan/vulkan.h>
38
39 #include "compiler/shader_enums.h"
40 #include "hwdef/rogue_hw_defs.h"
41 #include "pvr_csb.h"
42 #include "pvr_device_info.h"
43 #include "pvr_entrypoints.h"
44 #include "pvr_hw_pass.h"
45 #include "pvr_job_render.h"
46 #include "pvr_limits.h"
47 #include "pvr_pds.h"
48 #include "pvr_types.h"
49 #include "pvr_winsys.h"
50 #include "rogue/rogue.h"
51 #include "util/bitscan.h"
52 #include "util/format/u_format.h"
53 #include "util/log.h"
54 #include "util/macros.h"
55 #include "util/u_dynarray.h"
56 #include "vk_buffer.h"
57 #include "vk_command_buffer.h"
58 #include "vk_device.h"
59 #include "vk_image.h"
60 #include "vk_instance.h"
61 #include "vk_log.h"
62 #include "vk_physical_device.h"
63 #include "vk_queue.h"
64 #include "vk_sync.h"
65 #include "wsi_common.h"
66
67 #ifdef HAVE_VALGRIND
68 # include <valgrind/valgrind.h>
69 # include <valgrind/memcheck.h>
70 # define VG(x) x
71 #else
72 # define VG(x) ((void)0)
73 #endif
74
75 #define VK_VENDOR_ID_IMAGINATION 0x1010
76
77 #define PVR_WORKGROUP_DIMENSIONS 3U
78
79 #define PVR_SAMPLER_DESCRIPTOR_SIZE 4U
80 #define PVR_IMAGE_DESCRIPTOR_SIZE 4U
81
82 #define PVR_STATE_PBE_DWORDS 2U
83
84 #define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \
85 (uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U)
86
87 /* TODO: move into a common surface library? */
88 enum pvr_memlayout {
89 PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */
90 PVR_MEMLAYOUT_LINEAR,
91 PVR_MEMLAYOUT_TWIDDLED,
92 PVR_MEMLAYOUT_3DTWIDDLED,
93 };
94
95 enum pvr_cmd_buffer_status {
96 PVR_CMD_BUFFER_STATUS_INVALID = 0, /* explicitly treat 0 as invalid */
97 PVR_CMD_BUFFER_STATUS_INITIAL,
98 PVR_CMD_BUFFER_STATUS_RECORDING,
99 PVR_CMD_BUFFER_STATUS_EXECUTABLE,
100 };
101
102 enum pvr_texture_state {
103 PVR_TEXTURE_STATE_SAMPLE,
104 PVR_TEXTURE_STATE_STORAGE,
105 PVR_TEXTURE_STATE_ATTACHMENT,
106 PVR_TEXTURE_STATE_MAX_ENUM,
107 };
108
109 enum pvr_sub_cmd_type {
110 PVR_SUB_CMD_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
111 PVR_SUB_CMD_TYPE_GRAPHICS,
112 PVR_SUB_CMD_TYPE_COMPUTE,
113 PVR_SUB_CMD_TYPE_TRANSFER,
114 };
115
116 enum pvr_depth_stencil_usage {
117 PVR_DEPTH_STENCIL_USAGE_UNDEFINED = 0, /* explicitly treat 0 as undefined */
118 PVR_DEPTH_STENCIL_USAGE_NEEDED,
119 PVR_DEPTH_STENCIL_USAGE_NEVER,
120 };
121
122 enum pvr_job_type {
123 PVR_JOB_TYPE_GEOM,
124 PVR_JOB_TYPE_FRAG,
125 PVR_JOB_TYPE_COMPUTE,
126 PVR_JOB_TYPE_TRANSFER,
127 PVR_JOB_TYPE_MAX
128 };
129
130 enum pvr_pipeline_type {
131 PVR_PIPELINE_TYPE_INVALID = 0, /* explicitly treat 0 as undefined */
132 PVR_PIPELINE_TYPE_GRAPHICS,
133 PVR_PIPELINE_TYPE_COMPUTE,
134 };
135
136 enum pvr_pipeline_stage_bits {
137 PVR_PIPELINE_STAGE_GEOM_BIT = BITFIELD_BIT(PVR_JOB_TYPE_GEOM),
138 PVR_PIPELINE_STAGE_FRAG_BIT = BITFIELD_BIT(PVR_JOB_TYPE_FRAG),
139 PVR_PIPELINE_STAGE_COMPUTE_BIT = BITFIELD_BIT(PVR_JOB_TYPE_COMPUTE),
140 PVR_PIPELINE_STAGE_TRANSFER_BIT = BITFIELD_BIT(PVR_JOB_TYPE_TRANSFER),
141 };
142
143 #define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \
144 (PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT)
145
146 #define PVR_PIPELINE_STAGE_ALL_BITS \
147 (PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS | PVR_PIPELINE_STAGE_COMPUTE_BIT | \
148 PVR_PIPELINE_STAGE_TRANSFER_BIT)
149
150 #define PVR_NUM_SYNC_PIPELINE_STAGES 4U
151
152 /* Warning: Do not define an invalid stage as 0 since other code relies on 0
153 * being the first shader stage. This allows for stages to be split or added
154 * in the future. Defining 0 as invalid will very likely cause problems.
155 */
156 enum pvr_stage_allocation {
157 PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
158 PVR_STAGE_ALLOCATION_FRAGMENT,
159 PVR_STAGE_ALLOCATION_COMPUTE,
160 PVR_STAGE_ALLOCATION_COUNT
161 };
162
163 /* Scissor accumulation state defines
164 * - Disabled means that a clear has been detected, and scissor accumulation
165 * should stop.
166 * - Check for clear is when there's no clear loadops, but there could be
167 * another clear call that would be broken via scissoring
168 * - Enabled means that a scissor has been set in the pipeline, and
169 * accumulation can continue
170 */
171 enum pvr_scissor_accum_state {
172 PVR_SCISSOR_ACCUM_INVALID = 0, /* Explicitly treat 0 as invalid */
173 PVR_SCISSOR_ACCUM_DISABLED,
174 PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR,
175 PVR_SCISSOR_ACCUM_ENABLED,
176 };
177
178 struct pvr_bo;
179 struct pvr_compute_ctx;
180 struct pvr_compute_pipeline;
181 struct pvr_free_list;
182 struct pvr_graphics_pipeline;
183 struct pvr_instance;
184 struct pvr_render_ctx;
185 struct rogue_compiler;
186
187 struct pvr_physical_device {
188 struct vk_physical_device vk;
189
190 /* Back-pointer to instance */
191 struct pvr_instance *instance;
192
193 char *name;
194 int master_fd;
195 int render_fd;
196 char *master_path;
197 char *render_path;
198
199 struct pvr_winsys *ws;
200 struct pvr_device_info dev_info;
201
202 struct pvr_device_runtime_info dev_runtime_info;
203
204 VkPhysicalDeviceMemoryProperties memory;
205
206 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
207
208 struct wsi_device wsi_device;
209
210 struct rogue_compiler *compiler;
211 };
212
213 struct pvr_instance {
214 struct vk_instance vk;
215
216 int physical_devices_count;
217 struct pvr_physical_device physical_device;
218 };
219
220 struct pvr_queue {
221 struct vk_queue vk;
222
223 struct pvr_device *device;
224
225 struct pvr_render_ctx *gfx_ctx;
226 struct pvr_compute_ctx *compute_ctx;
227 struct pvr_transfer_ctx *transfer_ctx;
228
229 struct vk_sync *completion[PVR_JOB_TYPE_MAX];
230 };
231
232 struct pvr_vertex_binding {
233 struct pvr_buffer *buffer;
234 VkDeviceSize offset;
235 };
236
237 struct pvr_pds_upload {
238 struct pvr_bo *pvr_bo;
239 /* Offset from the pds heap base address. */
240 uint32_t data_offset;
241 /* Offset from the pds heap base address. */
242 uint32_t code_offset;
243
244 /* data_size + code_size = program_size. */
245 uint32_t data_size;
246 uint32_t code_size;
247 };
248
249 struct pvr_device {
250 struct vk_device vk;
251 struct pvr_instance *instance;
252 struct pvr_physical_device *pdevice;
253
254 int master_fd;
255 int render_fd;
256
257 struct pvr_winsys *ws;
258 struct pvr_winsys_heaps heaps;
259
260 struct pvr_free_list *global_free_list;
261
262 struct pvr_queue *queues;
263 uint32_t queue_count;
264
265 /* Running count of the number of job submissions across all queue. */
266 uint32_t global_queue_job_count;
267
268 /* Running count of the number of presentations across all queues. */
269 uint32_t global_queue_present_count;
270
271 uint32_t pixel_event_data_size_in_dwords;
272
273 uint64_t input_attachment_sampler;
274
275 struct pvr_pds_upload pds_compute_fence_program;
276
277 struct {
278 struct pvr_pds_upload pds;
279 struct pvr_bo *usc;
280 } nop_program;
281
282 /* Issue Data Fence, Wait for Data Fence state. */
283 struct {
284 uint32_t usc_shareds;
285 struct pvr_bo *usc;
286
287 /* Buffer in which the IDF/WDF program performs store ops. */
288 struct pvr_bo *store_bo;
289 /* Contains the initialization values for the shared registers. */
290 struct pvr_bo *shareds_bo;
291
292 struct pvr_pds_upload pds;
293 struct pvr_pds_upload sw_compute_barrier_pds;
294 } idfwdf_state;
295
296 VkPhysicalDeviceFeatures features;
297 };
298
299 struct pvr_device_memory {
300 struct vk_object_base base;
301 struct pvr_winsys_bo *bo;
302 };
303
304 struct pvr_mip_level {
305 /* Offset of the mip level in bytes */
306 uint32_t offset;
307
308 /* Aligned mip level size in bytes */
309 uint32_t size;
310
311 /* Aligned row length in bytes */
312 uint32_t pitch;
313
314 /* Aligned height in bytes */
315 uint32_t height_pitch;
316 };
317
318 struct pvr_image {
319 struct vk_image vk;
320
321 /* vma this image is bound to */
322 struct pvr_winsys_vma *vma;
323
324 /* Device address the image is mapped to in device virtual address space */
325 pvr_dev_addr_t dev_addr;
326
327 /* Derived and other state */
328 VkExtent3D physical_extent;
329 enum pvr_memlayout memlayout;
330 VkDeviceSize layer_size;
331 VkDeviceSize size;
332
333 VkDeviceSize alignment;
334
335 struct pvr_mip_level mip_levels[14];
336 };
337
338 struct pvr_buffer {
339 struct vk_buffer vk;
340
341 /* Derived and other state */
342 uint32_t alignment;
343 /* vma this buffer is bound to */
344 struct pvr_winsys_vma *vma;
345 /* Device address the buffer is mapped to in device virtual address space */
346 pvr_dev_addr_t dev_addr;
347 };
348
349 struct pvr_image_view {
350 struct vk_image_view vk;
351
352 /* Saved information from pCreateInfo. */
353 const struct pvr_image *image;
354
355 /* Prepacked Texture Image dword 0 and 1. It will be copied to the
356 * descriptor info during pvr_UpdateDescriptorSets().
357 *
358 * We create separate texture states for sampling, storage and input
359 * attachment cases.
360 */
361 uint64_t texture_state[PVR_TEXTURE_STATE_MAX_ENUM][2];
362 };
363
364 struct pvr_buffer_view {
365 struct vk_object_base base;
366
367 uint64_t range;
368 VkFormat format;
369
370 /* Prepacked Texture dword 0 and 1. It will be copied to the descriptor
371 * during pvr_UpdateDescriptorSets().
372 */
373 uint64_t texture_state[2];
374 };
375
376 union pvr_sampler_descriptor {
377 uint32_t words[PVR_SAMPLER_DESCRIPTOR_SIZE];
378
379 struct {
380 /* Packed PVRX(TEXSTATE_SAMPLER). */
381 uint64_t sampler_word;
382 uint32_t compare_op;
383 /* TODO: Figure out what this word is for and rename.
384 * Sampler state word 1?
385 */
386 uint32_t word3;
387 } data;
388 };
389
390 struct pvr_sampler {
391 struct vk_object_base base;
392
393 union pvr_sampler_descriptor descriptor;
394 };
395
396 struct pvr_descriptor_size_info {
397 /* Non-spillable size for storage in the common store. */
398 uint32_t primary;
399
400 /* Spillable size to accommodate limitation of the common store. */
401 uint32_t secondary;
402
403 uint32_t alignment;
404 };
405
406 struct pvr_descriptor_set_layout_binding {
407 VkDescriptorType type;
408
409 /* "M" in layout(set = N, binding = M)
410 * Can be used to index bindings in the descriptor_set_layout. Not the
411 * original user specified binding number as those might be non-contiguous.
412 */
413 uint32_t binding_number;
414
415 uint32_t descriptor_count;
416
417 /* Index into the flattened descriptor set */
418 uint16_t descriptor_index;
419
420 VkShaderStageFlags shader_stages;
421 /* Mask composed by shifted PVR_STAGE_ALLOCATION_...
422 * Makes it easier to check active shader stages by just shifting and
423 * ANDing instead of using VkShaderStageFlags and match the PVR_STAGE_...
424 */
425 uint32_t shader_stage_mask;
426
427 struct {
428 uint32_t primary;
429 uint32_t secondary;
430 } per_stage_offset_in_dwords[PVR_STAGE_ALLOCATION_COUNT];
431
432 bool has_immutable_samplers;
433 /* Index at which the samplers can be found in the descriptor_set_layout.
434 * 0 when the samplers are at index 0 or no samplers are present.
435 */
436 uint32_t immutable_samplers_index;
437 };
438
439 /* All sizes are in dwords. */
440 struct pvr_descriptor_set_layout_mem_layout {
441 uint32_t primary_offset;
442 uint32_t primary_size;
443
444 uint32_t secondary_offset;
445 uint32_t secondary_size;
446
447 uint32_t primary_dynamic_size;
448 uint32_t secondary_dynamic_size;
449 };
450
451 struct pvr_descriptor_set_layout {
452 struct vk_object_base base;
453
454 /* Total amount of descriptors contained in this set. */
455 uint32_t descriptor_count;
456
457 /* Count of dynamic buffers. */
458 uint32_t dynamic_buffer_count;
459
460 uint32_t binding_count;
461 struct pvr_descriptor_set_layout_binding *bindings;
462
463 uint32_t immutable_sampler_count;
464 const struct pvr_sampler **immutable_samplers;
465
466 /* Shader stages requiring access to descriptors in this set. */
467 VkShaderStageFlags shader_stages;
468
469 /* Count of each VkDescriptorType per shader stage. Dynamically allocated
470 * arrays per stage as to not hard code the max descriptor type here.
471 *
472 * Note: when adding a new type, it might not numerically follow the
473 * previous type so a sparse array will be created. You might want to
474 * readjust how these arrays are created and accessed.
475 */
476 uint32_t *per_stage_descriptor_count[PVR_STAGE_ALLOCATION_COUNT];
477
478 uint32_t total_size_in_dwords;
479 struct pvr_descriptor_set_layout_mem_layout
480 memory_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT];
481 };
482
483 struct pvr_descriptor_pool {
484 struct vk_object_base base;
485
486 VkAllocationCallbacks alloc;
487
488 /* Saved information from pCreateInfo. */
489 uint32_t max_sets;
490
491 uint32_t total_size_in_dwords;
492 uint32_t current_size_in_dwords;
493
494 /* Derived and other state. */
495 /* List of the descriptor sets created using this pool. */
496 struct list_head descriptor_sets;
497 };
498
499 struct pvr_descriptor {
500 VkDescriptorType type;
501
502 union {
503 struct {
504 struct pvr_buffer_view *bview;
505 pvr_dev_addr_t buffer_dev_addr;
506 VkDeviceSize buffer_desc_range;
507 VkDeviceSize buffer_create_info_size;
508 };
509
510 struct {
511 VkImageLayout layout;
512 const struct pvr_image_view *iview;
513 const struct pvr_sampler *sampler;
514 };
515 };
516 };
517
518 struct pvr_descriptor_set {
519 struct vk_object_base base;
520
521 const struct pvr_descriptor_set_layout *layout;
522 const struct pvr_descriptor_pool *pool;
523
524 struct pvr_bo *pvr_bo;
525
526 /* Links this descriptor set into pvr_descriptor_pool::descriptor_sets list.
527 */
528 struct list_head link;
529
530 /* Array of size layout::descriptor_count. */
531 struct pvr_descriptor descriptors[0];
532 };
533
534 struct pvr_descriptor_state {
535 struct pvr_descriptor_set *descriptor_sets[PVR_MAX_DESCRIPTOR_SETS];
536 uint32_t valid_mask;
537 };
538
539 struct pvr_transfer_cmd {
540 /* Node to link this cmd into the transfer_cmds list in
541 * pvr_sub_cmd::transfer structure.
542 */
543 struct list_head link;
544
545 struct pvr_buffer *src;
546 struct pvr_buffer *dst;
547 uint32_t region_count;
548 VkBufferCopy2 regions[0];
549 };
550
551 struct pvr_sub_cmd_gfx {
552 const struct pvr_framebuffer *framebuffer;
553
554 struct pvr_render_job job;
555
556 struct pvr_bo *depth_bias_bo;
557 struct pvr_bo *scissor_bo;
558
559 /* Tracking how the loaded depth/stencil values are being used. */
560 enum pvr_depth_stencil_usage depth_usage;
561 enum pvr_depth_stencil_usage stencil_usage;
562
563 /* Tracking whether the subcommand modifies depth/stencil. */
564 bool modifies_depth;
565 bool modifies_stencil;
566
567 /* Control stream builder object */
568 struct pvr_csb control_stream;
569
570 uint32_t hw_render_idx;
571
572 uint32_t max_tiles_in_flight;
573
574 bool empty_cmd;
575
576 /* True if any fragment shader used in this sub command uses atomic
577 * operations.
578 */
579 bool frag_uses_atomic_ops;
580
581 bool disable_compute_overlap;
582
583 /* True if any fragment shader used in this sub command has side
584 * effects.
585 */
586 bool frag_has_side_effects;
587
588 /* True if any vertex shader used in this sub command contains both
589 * texture reads and texture writes.
590 */
591 bool vertex_uses_texture_rw;
592
593 /* True if any fragment shader used in this sub command contains
594 * both texture reads and texture writes.
595 */
596 bool frag_uses_texture_rw;
597 };
598
599 struct pvr_sub_cmd_compute {
600 /* Control stream builder object. */
601 struct pvr_csb control_stream;
602
603 struct pvr_winsys_compute_submit_info submit_info;
604
605 uint32_t num_shared_regs;
606
607 /* True if any shader used in this sub command uses atomic
608 * operations.
609 */
610 bool uses_atomic_ops;
611
612 bool uses_barrier;
613
614 bool pds_sw_barrier_requires_clearing;
615 };
616
617 struct pvr_sub_cmd_transfer {
618 /* List of pvr_transfer_cmd type structures. */
619 struct list_head transfer_cmds;
620 };
621
622 struct pvr_sub_cmd {
623 /* This links the subcommand in pvr_cmd_buffer:sub_cmds list. */
624 struct list_head link;
625
626 enum pvr_sub_cmd_type type;
627
628 union {
629 struct pvr_sub_cmd_gfx gfx;
630 struct pvr_sub_cmd_compute compute;
631 struct pvr_sub_cmd_transfer transfer;
632 };
633 };
634
635 struct pvr_render_pass_info {
636 const struct pvr_render_pass *pass;
637 struct pvr_framebuffer *framebuffer;
638
639 struct pvr_image_view **attachments;
640
641 uint32_t subpass_idx;
642 uint32_t current_hw_subpass;
643
644 VkRect2D render_area;
645
646 uint32_t clear_value_count;
647 VkClearValue *clear_values;
648
649 VkPipelineBindPoint pipeline_bind_point;
650
651 bool process_empty_tiles;
652 bool enable_bg_tag;
653 uint32_t userpass_spawn;
654
655 /* Have we had to scissor a depth/stencil clear because render area was not
656 * tile aligned?
657 */
658 bool scissor_ds_clear;
659 };
660
661 struct pvr_emit_state {
662 bool ppp_control : 1;
663 bool isp : 1;
664 bool isp_fb : 1;
665 bool isp_ba : 1;
666 bool isp_bb : 1;
667 bool isp_dbsc : 1;
668 bool pds_fragment_stateptr0 : 1;
669 bool pds_fragment_stateptr1 : 1;
670 bool pds_fragment_stateptr2 : 1;
671 bool pds_fragment_stateptr3 : 1;
672 bool region_clip : 1;
673 bool viewport : 1;
674 bool wclamp : 1;
675 bool output_selects : 1;
676 bool varying_word0 : 1;
677 bool varying_word1 : 1;
678 bool varying_word2 : 1;
679 bool stream_out : 1;
680 };
681
682 struct pvr_ppp_state {
683 uint32_t header;
684
685 struct {
686 /* TODO: Can we get rid of the "control" field? */
687 struct PVRX(TA_STATE_ISPCTL) control_struct;
688 uint32_t control;
689
690 uint32_t front_a;
691 uint32_t front_b;
692 uint32_t back_a;
693 uint32_t back_b;
694 } isp;
695
696 struct {
697 uint16_t scissor_index;
698 uint16_t depthbias_index;
699 } depthbias_scissor_indices;
700
701 struct {
702 uint32_t pixel_shader_base;
703 uint32_t texture_uniform_code_base;
704 uint32_t size_info1;
705 uint32_t size_info2;
706 uint32_t varying_base;
707 uint32_t texture_state_data_base;
708 uint32_t uniform_state_data_base;
709 } pds;
710
711 struct {
712 uint32_t word0;
713 uint32_t word1;
714 } region_clipping;
715
716 struct {
717 uint32_t a0;
718 uint32_t m0;
719 uint32_t a1;
720 uint32_t m1;
721 uint32_t a2;
722 uint32_t m2;
723 } viewports[PVR_MAX_VIEWPORTS];
724
725 uint32_t viewport_count;
726
727 uint32_t output_selects;
728
729 uint32_t varying_word[2];
730
731 uint32_t ppp_control;
732 };
733
734 #define PVR_DYNAMIC_STATE_BIT_VIEWPORT BITFIELD_BIT(0U)
735 #define PVR_DYNAMIC_STATE_BIT_SCISSOR BITFIELD_BIT(1U)
736 #define PVR_DYNAMIC_STATE_BIT_LINE_WIDTH BITFIELD_BIT(2U)
737 #define PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS BITFIELD_BIT(3U)
738 #define PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK BITFIELD_BIT(4U)
739 #define PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK BITFIELD_BIT(5U)
740 #define PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE BITFIELD_BIT(6U)
741 #define PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS BITFIELD_BIT(7U)
742
743 #define PVR_DYNAMIC_STATE_ALL_BITS \
744 ((PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS << 1U) - 1U)
745
746 struct pvr_dynamic_state {
747 /* Identifies which pipeline state is static or dynamic.
748 * To test for dynamic: & PVR_STATE_BITS_...
749 */
750 uint32_t mask;
751
752 struct {
753 /* TODO: fixme in the original code - figure out what. */
754 uint32_t count;
755 VkViewport viewports[PVR_MAX_VIEWPORTS];
756 } viewport;
757
758 struct {
759 /* TODO: fixme in the original code - figure out what. */
760 uint32_t count;
761 VkRect2D scissors[PVR_MAX_VIEWPORTS];
762 } scissor;
763
764 /* Saved information from pCreateInfo. */
765 float line_width;
766
767 struct {
768 /* Saved information from pCreateInfo. */
769 float constant_factor;
770 float clamp;
771 float slope_factor;
772 } depth_bias;
773 float blend_constants[4];
774 struct {
775 uint32_t front;
776 uint32_t back;
777 } compare_mask;
778 struct {
779 uint32_t front;
780 uint32_t back;
781 } write_mask;
782 struct {
783 uint32_t front;
784 uint32_t back;
785 } reference;
786 };
787
788 struct pvr_cmd_buffer_draw_state {
789 uint32_t base_instance;
790 uint32_t base_vertex;
791 bool draw_indirect;
792 bool draw_indexed;
793 };
794
795 struct pvr_cmd_buffer_state {
796 VkResult status;
797
798 /* Pipeline binding. */
799 const struct pvr_graphics_pipeline *gfx_pipeline;
800
801 const struct pvr_compute_pipeline *compute_pipeline;
802
803 struct pvr_render_pass_info render_pass_info;
804
805 struct pvr_sub_cmd *current_sub_cmd;
806
807 struct pvr_ppp_state ppp_state;
808
809 union {
810 struct pvr_emit_state emit_state;
811 /* This is intended to allow setting and clearing of all bits. This
812 * shouldn't be used to access specific bits of ppp_state.
813 */
814 uint32_t emit_state_bits;
815 };
816
817 struct {
818 /* FIXME: Check if we need a dirty state flag for the given scissor
819 * accumulation state.
820 * Check whether these members should be moved in the top level struct
821 * and this struct replaces with just pvr_dynamic_state "dynamic".
822 */
823 enum pvr_scissor_accum_state scissor_accum_state;
824 VkRect2D scissor_accum_bounds;
825
826 struct pvr_dynamic_state common;
827 } dynamic;
828
829 struct pvr_vertex_binding vertex_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS];
830
831 struct {
832 struct pvr_buffer *buffer;
833 VkDeviceSize offset;
834 VkIndexType type;
835 } index_buffer_binding;
836
837 struct {
838 uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE];
839 VkShaderStageFlags dirty_stages;
840 } push_constants;
841
842 /* Array size of barriers_needed is based on number of sync pipeline
843 * stages.
844 */
845 uint32_t barriers_needed[4];
846
847 struct pvr_descriptor_state gfx_desc_state;
848 struct pvr_descriptor_state compute_desc_state;
849
850 VkFormat depth_format;
851
852 struct {
853 bool viewport : 1;
854 bool scissor : 1;
855
856 bool compute_pipeline_binding : 1;
857 bool compute_desc_dirty : 1;
858
859 bool gfx_pipeline_binding : 1;
860 bool gfx_desc_dirty : 1;
861
862 bool vertex_bindings : 1;
863 bool index_buffer_binding : 1;
864 bool vertex_descriptors : 1;
865 bool fragment_descriptors : 1;
866
867 bool line_width : 1;
868
869 bool depth_bias : 1;
870
871 bool blend_constants : 1;
872
873 bool compare_mask : 1;
874 bool write_mask : 1;
875 bool reference : 1;
876
877 bool userpass_spawn : 1;
878
879 /* Some draw state needs to be tracked for changes between draw calls
880 * i.e. if we get a draw with baseInstance=0, followed by a call with
881 * baseInstance=1 that needs to cause us to select a different PDS
882 * attrib program and update the BASE_INSTANCE PDS const. If only
883 * baseInstance changes then we just have to update the data section.
884 */
885 bool draw_base_instance : 1;
886 bool draw_variant : 1;
887 } dirty;
888
889 struct pvr_cmd_buffer_draw_state draw_state;
890
891 struct {
892 uint32_t code_offset;
893 const struct pvr_pds_info *info;
894 } pds_shader;
895
896 uint32_t max_shared_regs;
897
898 /* Address of data segment for vertex attrib upload program. */
899 uint32_t pds_vertex_attrib_offset;
900
901 uint32_t pds_fragment_descriptor_data_offset;
902 uint32_t pds_compute_descriptor_data_offset;
903 };
904
905 static_assert(
906 sizeof(((struct pvr_cmd_buffer_state *)(0))->emit_state) <=
907 sizeof(((struct pvr_cmd_buffer_state *)(0))->emit_state_bits),
908 "Size of emit_state_bits must be greater that or equal to emit_state.");
909
910 struct pvr_cmd_buffer {
911 struct vk_command_buffer vk;
912
913 struct pvr_device *device;
914
915 /* Buffer status, invalid/initial/recording/executable */
916 enum pvr_cmd_buffer_status status;
917
918 /* Buffer usage flags */
919 VkCommandBufferUsageFlags usage_flags;
920
921 struct util_dynarray depth_bias_array;
922
923 struct util_dynarray scissor_array;
924 uint32_t scissor_words[2];
925
926 struct pvr_cmd_buffer_state state;
927
928 /* List of pvr_bo structs associated with this cmd buffer. */
929 struct list_head bo_list;
930
931 struct list_head sub_cmds;
932 };
933
934 struct pvr_pipeline_layout {
935 struct vk_object_base base;
936
937 uint32_t set_count;
938 /* Contains set_count amount of descriptor set layouts. */
939 struct pvr_descriptor_set_layout *set_layout[PVR_MAX_DESCRIPTOR_SETS];
940
941 VkShaderStageFlags push_constants_shader_stages;
942
943 VkShaderStageFlags shader_stages;
944
945 /* Per stage masks indicating which set in the layout contains any
946 * descriptor of the appropriate types: VK..._{SAMPLER, SAMPLED_IMAGE,
947 * UNIFORM_TEXEL_BUFFER, UNIFORM_BUFFER, STORAGE_BUFFER}.
948 * Shift by the set's number to check the mask (1U << set_num).
949 */
950 uint32_t per_stage_descriptor_masks[PVR_STAGE_ALLOCATION_COUNT];
951
952 /* Array of descriptor offsets at which the set's descriptors' start, per
953 * stage, within all the sets in the pipeline layout per descriptor type.
954 * Note that we only store into for specific descriptor types
955 * VK_DESCRIPTOR_TYPE_{SAMPLER, SAMPLED_IMAGE, UNIFORM_TEXEL_BUFFER,
956 * UNIFORM_BUFFER, STORAGE_BUFFER}, the rest will be 0.
957 */
958 uint32_t
959 descriptor_offsets[PVR_MAX_DESCRIPTOR_SETS][PVR_STAGE_ALLOCATION_COUNT]
960 [PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT];
961
962 /* There is no accounting for dynamics in here. They will be garbage values.
963 */
964 struct pvr_descriptor_set_layout_mem_layout
965 register_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT]
966 [PVR_MAX_DESCRIPTOR_SETS];
967
968 /* All sizes in dwords. */
969 struct pvr_pipeline_layout_reg_info {
970 uint32_t primary_dynamic_size_in_dwords;
971 uint32_t secondary_dynamic_size_in_dwords;
972 } per_stage_reg_info[PVR_STAGE_ALLOCATION_COUNT];
973 };
974
975 struct pvr_pipeline_cache {
976 struct vk_object_base base;
977
978 struct pvr_device *device;
979 };
980
981 struct pvr_stage_allocation_descriptor_state {
982 struct pvr_pds_upload pds_code;
983 /* Since we upload the code segment separately from the data segment
984 * pds_code->data_size might be 0 whilst
985 * pds_info->data_size_in_dwords might be >0 in the case of this struct
986 * referring to the code upload.
987 */
988 struct pvr_pds_info pds_info;
989
990 /* Already setup compile time static consts. */
991 struct pvr_bo *static_consts;
992 };
993
994 struct pvr_pds_attrib_program {
995 struct pvr_pds_info info;
996 /* The uploaded PDS program stored here only contains the code segment,
997 * meaning the data size will be 0, unlike the data size stored in the
998 * 'info' member above.
999 */
1000 struct pvr_pds_upload program;
1001 };
1002
1003 struct pvr_pipeline_stage_state {
1004 uint32_t const_shared_reg_count;
1005 uint32_t const_shared_reg_offset;
1006 uint32_t temps_count;
1007
1008 uint32_t coefficient_size;
1009
1010 /* True if this shader uses any atomic operations. */
1011 bool uses_atomic_ops;
1012
1013 /* True if this shader uses both texture reads and texture writes. */
1014 bool uses_texture_rw;
1015
1016 /* Only used for compute stage. */
1017 bool uses_barrier;
1018
1019 /* True if this shader has side effects */
1020 bool has_side_effects;
1021
1022 /* True if this shader is simply a nop.end. */
1023 bool empty_program;
1024 };
1025
1026 struct pvr_vertex_shader_state {
1027 /* Pointer to a buffer object that contains the shader binary. */
1028 struct pvr_bo *bo;
1029 uint32_t entry_offset;
1030
1031 /* 2 since we only need STATE_VARYING{0,1} state words. */
1032 uint32_t varying[2];
1033
1034 struct pvr_pds_attrib_program
1035 pds_attrib_programs[PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT];
1036
1037 struct pvr_pipeline_stage_state stage_state;
1038 /* FIXME: Move this into stage_state? */
1039 struct pvr_stage_allocation_descriptor_state descriptor_state;
1040 uint32_t vertex_input_size;
1041 uint32_t vertex_output_size;
1042 uint32_t user_clip_planes_mask;
1043 };
1044
1045 struct pvr_fragment_shader_state {
1046 /* Pointer to a buffer object that contains the shader binary. */
1047 struct pvr_bo *bo;
1048 uint32_t entry_offset;
1049
1050 struct pvr_pipeline_stage_state stage_state;
1051 /* FIXME: Move this into stage_state? */
1052 struct pvr_stage_allocation_descriptor_state descriptor_state;
1053 uint32_t pass_type;
1054
1055 struct pvr_pds_upload pds_coeff_program;
1056 struct pvr_pds_upload pds_fragment_program;
1057 };
1058
1059 struct pvr_pipeline {
1060 struct vk_object_base base;
1061
1062 enum pvr_pipeline_type type;
1063
1064 /* Saved information from pCreateInfo. */
1065 struct pvr_pipeline_layout *layout;
1066 };
1067
1068 struct pvr_compute_pipeline {
1069 struct pvr_pipeline base;
1070
1071 struct {
1072 /* TODO: Change this to be an anonymous struct once the shader hardcoding
1073 * is removed.
1074 */
1075 struct pvr_compute_pipeline_shader_state {
1076 /* Pointer to a buffer object that contains the shader binary. */
1077 struct pvr_bo *bo;
1078
1079 bool uses_atomic_ops;
1080 bool uses_barrier;
1081 /* E.g. GLSL shader uses gl_NumWorkGroups. */
1082 bool uses_num_workgroups;
1083
1084 uint32_t const_shared_reg_count;
1085 uint32_t input_register_count;
1086 uint32_t work_size;
1087 uint32_t coefficient_register_count;
1088 } shader;
1089
1090 struct {
1091 uint32_t base_workgroup : 1;
1092 } flags;
1093
1094 struct pvr_stage_allocation_descriptor_state descriptor;
1095
1096 struct pvr_pds_upload primary_program;
1097 struct pvr_pds_info primary_program_info;
1098
1099 struct pvr_pds_base_workgroup_program {
1100 struct pvr_pds_upload code_upload;
1101
1102 uint32_t *data_section;
1103 /* Offset within the PDS data section at which the base workgroup id
1104 * resides.
1105 */
1106 uint32_t base_workgroup_data_patching_offset;
1107
1108 struct pvr_pds_info info;
1109 } primary_base_workgroup_variant_program;
1110 } state;
1111 };
1112
1113 struct pvr_graphics_pipeline {
1114 struct pvr_pipeline base;
1115
1116 VkSampleCountFlagBits rasterization_samples;
1117 struct pvr_raster_state {
1118 /* Derived and other state. */
1119 /* Indicates whether primitives are discarded immediately before the
1120 * rasterization stage.
1121 */
1122 bool discard_enable;
1123 VkCullModeFlags cull_mode;
1124 VkFrontFace front_face;
1125 bool depth_bias_enable;
1126 bool depth_clamp_enable;
1127 } raster_state;
1128 struct {
1129 VkPrimitiveTopology topology;
1130 bool primitive_restart;
1131 } input_asm_state;
1132 uint32_t sample_mask;
1133
1134 struct pvr_dynamic_state dynamic_state;
1135
1136 VkCompareOp depth_compare_op;
1137 bool depth_write_disable;
1138
1139 struct {
1140 VkCompareOp compare_op;
1141 /* SOP1 */
1142 VkStencilOp fail_op;
1143 /* SOP2 */
1144 VkStencilOp depth_fail_op;
1145 /* SOP3 */
1146 VkStencilOp pass_op;
1147 } stencil_front, stencil_back;
1148
1149 /* Derived and other state */
1150 size_t stage_indices[MESA_SHADER_FRAGMENT + 1];
1151
1152 struct pvr_vertex_shader_state vertex_shader_state;
1153 struct pvr_fragment_shader_state fragment_shader_state;
1154 };
1155
1156 struct pvr_query_pool {
1157 struct vk_object_base base;
1158
1159 /* Stride of result_buffer to get to the start of the results for the next
1160 * Phantom.
1161 */
1162 uint32_t result_stride;
1163
1164 struct pvr_bo *result_buffer;
1165 struct pvr_bo *availability_buffer;
1166 };
1167
1168 struct pvr_render_target {
1169 struct pvr_rt_dataset *rt_dataset;
1170
1171 pthread_mutex_t mutex;
1172
1173 bool valid;
1174 };
1175
1176 struct pvr_framebuffer {
1177 struct vk_object_base base;
1178
1179 /* Saved information from pCreateInfo. */
1180 uint32_t width;
1181 uint32_t height;
1182 uint32_t layers;
1183
1184 uint32_t attachment_count;
1185 struct pvr_image_view **attachments;
1186
1187 /* Derived and other state. */
1188 struct pvr_bo *ppp_state_bo;
1189 /* PPP state size in dwords. */
1190 size_t ppp_state_size;
1191
1192 uint32_t render_targets_count;
1193 struct pvr_render_target *render_targets;
1194 };
1195
1196 struct pvr_render_pass_attachment {
1197 /* Saved information from pCreateInfo. */
1198 VkAttachmentLoadOp load_op;
1199
1200 VkAttachmentStoreOp store_op;
1201
1202 VkAttachmentLoadOp stencil_load_op;
1203
1204 VkAttachmentStoreOp stencil_store_op;
1205
1206 VkFormat vk_format;
1207 uint32_t sample_count;
1208 VkImageLayout initial_layout;
1209
1210 /* Derived and other state. */
1211 /* True if the attachment format includes a stencil component. */
1212 bool has_stencil;
1213
1214 /* Can this surface be resolved by the PBE. */
1215 bool is_pbe_downscalable;
1216
1217 uint32_t index;
1218 };
1219
1220 struct pvr_render_subpass {
1221 /* Saved information from pCreateInfo. */
1222 /* The number of samples per color attachment (or depth attachment if
1223 * z-only).
1224 */
1225 /* FIXME: rename to 'samples' to match struct pvr_image */
1226 uint32_t sample_count;
1227
1228 uint32_t color_count;
1229 uint32_t *color_attachments;
1230 uint32_t *resolve_attachments;
1231
1232 uint32_t input_count;
1233 uint32_t *input_attachments;
1234
1235 uint32_t *depth_stencil_attachment;
1236
1237 /* Derived and other state. */
1238 uint32_t dep_count;
1239 uint32_t *dep_list;
1240
1241 /* Array with dep_count elements. flush_on_dep[x] is true if this subpass
1242 * and the subpass dep_list[x] can't be in the same hardware render.
1243 */
1244 bool *flush_on_dep;
1245
1246 uint32_t index;
1247
1248 uint32_t userpass_spawn;
1249
1250 VkPipelineBindPoint pipeline_bind_point;
1251 };
1252
1253 struct pvr_render_pass {
1254 struct vk_object_base base;
1255
1256 /* Saved information from pCreateInfo. */
1257 uint32_t attachment_count;
1258
1259 struct pvr_render_pass_attachment *attachments;
1260
1261 uint32_t subpass_count;
1262
1263 struct pvr_render_subpass *subpasses;
1264
1265 struct pvr_renderpass_hwsetup *hw_setup;
1266
1267 /* Derived and other state. */
1268 /* FIXME: rename to 'max_samples' as we use 'samples' elsewhere */
1269 uint32_t max_sample_count;
1270
1271 /* The maximum number of tile buffers to use in any subpass. */
1272 uint32_t max_tilebuffer_count;
1273 };
1274
1275 struct pvr_load_op {
1276 bool is_hw_object;
1277
1278 uint32_t clear_mask;
1279
1280 struct pvr_bo *usc_frag_prog_bo;
1281 uint32_t const_shareds_count;
1282 uint32_t shareds_dest_offset;
1283 uint32_t shareds_count;
1284
1285 struct pvr_pds_upload pds_frag_prog;
1286
1287 struct pvr_pds_upload pds_tex_state_prog;
1288 uint32_t temps_count;
1289 };
1290
1291 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
1292 const struct pvr_physical_device *pdevice,
1293 uint32_t fs_common_size,
1294 uint32_t min_tiles_in_flight);
1295
1296 VkResult pvr_wsi_init(struct pvr_physical_device *pdevice);
1297 void pvr_wsi_finish(struct pvr_physical_device *pdevice);
1298
1299 VkResult pvr_queues_create(struct pvr_device *device,
1300 const VkDeviceCreateInfo *pCreateInfo);
1301 void pvr_queues_destroy(struct pvr_device *device);
1302
1303 VkResult pvr_bind_memory(struct pvr_device *device,
1304 struct pvr_device_memory *mem,
1305 VkDeviceSize offset,
1306 VkDeviceSize size,
1307 VkDeviceSize alignment,
1308 struct pvr_winsys_vma **const vma_out,
1309 pvr_dev_addr_t *const dev_addr_out);
1310 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma);
1311
1312 VkResult pvr_gpu_upload(struct pvr_device *device,
1313 struct pvr_winsys_heap *heap,
1314 const void *data,
1315 size_t size,
1316 uint64_t alignment,
1317 struct pvr_bo **const pvr_bo_out);
1318 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
1319 const uint32_t *data,
1320 uint32_t data_size_dwords,
1321 uint32_t data_alignment,
1322 const uint32_t *code,
1323 uint32_t code_size_dwords,
1324 uint32_t code_alignment,
1325 uint64_t min_alignment,
1326 struct pvr_pds_upload *const pds_upload_out);
1327
1328 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
1329 const void *code,
1330 size_t code_size,
1331 uint64_t code_alignment,
1332 struct pvr_bo **const pvr_bo_out);
1333
1334 VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
1335 struct pvr_transfer_cmd *transfer_cmd);
1336
1337 VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer,
1338 struct pvr_winsys_heap *heap,
1339 uint64_t size,
1340 uint32_t flags,
1341 struct pvr_bo **const pvr_bo_out);
1342
1343 static inline struct pvr_compute_pipeline *
to_pvr_compute_pipeline(struct pvr_pipeline * pipeline)1344 to_pvr_compute_pipeline(struct pvr_pipeline *pipeline)
1345 {
1346 assert(pipeline->type == PVR_PIPELINE_TYPE_COMPUTE);
1347 return container_of(pipeline, struct pvr_compute_pipeline, base);
1348 }
1349
1350 static inline struct pvr_graphics_pipeline *
to_pvr_graphics_pipeline(struct pvr_pipeline * pipeline)1351 to_pvr_graphics_pipeline(struct pvr_pipeline *pipeline)
1352 {
1353 assert(pipeline->type == PVR_PIPELINE_TYPE_GRAPHICS);
1354 return container_of(pipeline, struct pvr_graphics_pipeline, base);
1355 }
1356
1357 static enum pvr_pipeline_stage_bits
pvr_stage_mask(VkPipelineStageFlags2 stage_mask)1358 pvr_stage_mask(VkPipelineStageFlags2 stage_mask)
1359 {
1360 enum pvr_pipeline_stage_bits stages = 0;
1361
1362 if (stage_mask & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)
1363 return PVR_PIPELINE_STAGE_ALL_BITS;
1364
1365 if (stage_mask & (VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT))
1366 stages |= PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS;
1367
1368 if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
1369 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
1370 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
1371 VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
1372 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
1373 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
1374 stages |= PVR_PIPELINE_STAGE_GEOM_BIT;
1375 }
1376
1377 if (stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
1378 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
1379 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
1380 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
1381 stages |= PVR_PIPELINE_STAGE_FRAG_BIT;
1382 }
1383
1384 if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
1385 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
1386 stages |= PVR_PIPELINE_STAGE_COMPUTE_BIT;
1387 }
1388
1389 if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT))
1390 stages |= PVR_PIPELINE_STAGE_TRANSFER_BIT;
1391
1392 return stages;
1393 }
1394
1395 static inline enum pvr_pipeline_stage_bits
pvr_stage_mask_src(VkPipelineStageFlags2KHR stage_mask)1396 pvr_stage_mask_src(VkPipelineStageFlags2KHR stage_mask)
1397 {
1398 /* If the source is bottom of pipe, all stages will need to be waited for. */
1399 if (stage_mask & VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
1400 return PVR_PIPELINE_STAGE_ALL_BITS;
1401
1402 return pvr_stage_mask(stage_mask);
1403 }
1404
1405 static inline enum pvr_pipeline_stage_bits
pvr_stage_mask_dst(VkPipelineStageFlags2KHR stage_mask)1406 pvr_stage_mask_dst(VkPipelineStageFlags2KHR stage_mask)
1407 {
1408 /* If the destination is top of pipe, all stages should be blocked by prior
1409 * commands.
1410 */
1411 if (stage_mask & VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)
1412 return PVR_PIPELINE_STAGE_ALL_BITS;
1413
1414 return pvr_stage_mask(stage_mask);
1415 }
1416
1417 VkResult pvr_pds_fragment_program_create_and_upload(
1418 struct pvr_device *device,
1419 const VkAllocationCallbacks *allocator,
1420 const struct pvr_bo *fragment_shader_bo,
1421 uint32_t fragment_temp_count,
1422 enum rogue_msaa_mode msaa_mode,
1423 bool has_phase_rate_change,
1424 struct pvr_pds_upload *const pds_upload_out);
1425
1426 VkResult pvr_pds_unitex_state_program_create_and_upload(
1427 struct pvr_device *device,
1428 const VkAllocationCallbacks *allocator,
1429 uint32_t texture_kicks,
1430 uint32_t uniform_kicks,
1431 struct pvr_pds_upload *const pds_upload_out);
1432
1433 #define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \
1434 VK_FROM_HANDLE(__pvr_type, __name, __handle)
1435
1436 VK_DEFINE_HANDLE_CASTS(pvr_cmd_buffer,
1437 vk.base,
1438 VkCommandBuffer,
1439 VK_OBJECT_TYPE_COMMAND_BUFFER)
1440 VK_DEFINE_HANDLE_CASTS(pvr_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
1441 VK_DEFINE_HANDLE_CASTS(pvr_instance,
1442 vk.base,
1443 VkInstance,
1444 VK_OBJECT_TYPE_INSTANCE)
1445 VK_DEFINE_HANDLE_CASTS(pvr_physical_device,
1446 vk.base,
1447 VkPhysicalDevice,
1448 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
1449 VK_DEFINE_HANDLE_CASTS(pvr_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
1450
1451 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_device_memory,
1452 base,
1453 VkDeviceMemory,
1454 VK_OBJECT_TYPE_DEVICE_MEMORY)
1455 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
1456 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_cache,
1457 base,
1458 VkPipelineCache,
1459 VK_OBJECT_TYPE_PIPELINE_CACHE)
1460 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer,
1461 vk.base,
1462 VkBuffer,
1463 VK_OBJECT_TYPE_BUFFER)
1464 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image_view,
1465 vk.base,
1466 VkImageView,
1467 VK_OBJECT_TYPE_IMAGE_VIEW)
1468 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer_view,
1469 base,
1470 VkBufferView,
1471 VK_OBJECT_TYPE_BUFFER_VIEW)
1472 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set_layout,
1473 base,
1474 VkDescriptorSetLayout,
1475 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
1476 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set,
1477 base,
1478 VkDescriptorSet,
1479 VK_OBJECT_TYPE_DESCRIPTOR_SET)
1480 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_pool,
1481 base,
1482 VkDescriptorPool,
1483 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
1484 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_sampler,
1485 base,
1486 VkSampler,
1487 VK_OBJECT_TYPE_SAMPLER)
1488 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_layout,
1489 base,
1490 VkPipelineLayout,
1491 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
1492 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline,
1493 base,
1494 VkPipeline,
1495 VK_OBJECT_TYPE_PIPELINE)
1496 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_query_pool,
1497 base,
1498 VkQueryPool,
1499 VK_OBJECT_TYPE_QUERY_POOL)
1500 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_framebuffer,
1501 base,
1502 VkFramebuffer,
1503 VK_OBJECT_TYPE_FRAMEBUFFER)
1504 VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass,
1505 base,
1506 VkRenderPass,
1507 VK_OBJECT_TYPE_RENDER_PASS)
1508
1509 /**
1510 * Warn on ignored extension structs.
1511 *
1512 * The Vulkan spec requires us to ignore unsupported or unknown structs in
1513 * a pNext chain. In debug mode, emitting warnings for ignored structs may
1514 * help us discover structs that we should not have ignored.
1515 *
1516 *
1517 * From the Vulkan 1.0.38 spec:
1518 *
1519 * Any component of the implementation (the loader, any enabled layers,
1520 * and drivers) must skip over, without processing (other than reading the
1521 * sType and pNext members) any chained structures with sType values not
1522 * defined by extensions supported by that component.
1523 */
1524 #define pvr_debug_ignored_stype(sType) \
1525 mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
1526
1527 /* Debug helper macros. */
1528 #define PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer) \
1529 do { \
1530 struct pvr_cmd_buffer *const _cmd_buffer = (cmd_buffer); \
1531 if (_cmd_buffer->status != PVR_CMD_BUFFER_STATUS_RECORDING) { \
1532 vk_errorf(_cmd_buffer, \
1533 VK_ERROR_OUT_OF_DEVICE_MEMORY, \
1534 "Command buffer is not in recording state"); \
1535 return; \
1536 } else if (_cmd_buffer->state.status < VK_SUCCESS) { \
1537 vk_errorf(_cmd_buffer, \
1538 _cmd_buffer->state.status, \
1539 "Skipping function as command buffer has " \
1540 "previous build error"); \
1541 return; \
1542 } \
1543 } while (0)
1544
1545 /**
1546 * Print a FINISHME message, including its source location.
1547 */
1548 #define pvr_finishme(format, ...) \
1549 do { \
1550 static bool reported = false; \
1551 if (!reported) { \
1552 mesa_logw("%s:%d: FINISHME: " format, \
1553 __FILE__, \
1554 __LINE__, \
1555 ##__VA_ARGS__); \
1556 reported = true; \
1557 } \
1558 } while (false)
1559
1560 /* A non-fatal assert. Useful for debugging. */
1561 #ifdef DEBUG
1562 # define pvr_assert(x) \
1563 ({ \
1564 if (unlikely(!(x))) \
1565 mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
1566 })
1567 #else
1568 # define pvr_assert(x)
1569 #endif
1570
1571 #endif /* PVR_PRIVATE_H */
1572