1 /*
2  * Copyright © 2023 Imagination Technologies Ltd.
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28  * SOFTWARE.
29  */
30 
31 #ifndef PVR_COMMON_H
32 #define PVR_COMMON_H
33 
34 #include <stdbool.h>
35 #include <stdint.h>
36 #include <vulkan/vulkan.h>
37 
38 /* FIXME: Rename this, and ensure it only contains what's
39  * relevant for the driver/compiler interface (no Vulkan types).
40  */
41 
42 #include "hwdef/rogue_hw_defs.h"
43 #include "pvr_limits.h"
44 #include "pvr_types.h"
45 #include "util/list.h"
46 #include "util/macros.h"
47 #include "vk_object.h"
48 #include "vk_sampler.h"
49 #include "vk_sync.h"
50 
51 #define VK_VENDOR_ID_IMAGINATION 0x1010
52 
53 #define PVR_WORKGROUP_DIMENSIONS 3U
54 
55 #define PVR_SAMPLER_DESCRIPTOR_SIZE 4U
56 #define PVR_IMAGE_DESCRIPTOR_SIZE 4U
57 
58 #define PVR_STATE_PBE_DWORDS 2U
59 
60 #define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \
61    (uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U)
62 
63 #define PVR_TRANSFER_MAX_LAYERS 1U
64 #define PVR_TRANSFER_MAX_LOADS 4U
65 #define PVR_TRANSFER_MAX_IMAGES \
66    (PVR_TRANSFER_MAX_LAYERS * PVR_TRANSFER_MAX_LOADS)
67 
68 /* TODO: move into a common surface library? */
69 enum pvr_memlayout {
70    PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */
71    PVR_MEMLAYOUT_LINEAR,
72    PVR_MEMLAYOUT_TWIDDLED,
73    PVR_MEMLAYOUT_3DTWIDDLED,
74 };
75 
76 enum pvr_texture_state {
77    PVR_TEXTURE_STATE_SAMPLE,
78    PVR_TEXTURE_STATE_STORAGE,
79    PVR_TEXTURE_STATE_ATTACHMENT,
80    PVR_TEXTURE_STATE_MAX_ENUM,
81 };
82 
83 enum pvr_sub_cmd_type {
84    PVR_SUB_CMD_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
85    PVR_SUB_CMD_TYPE_GRAPHICS,
86    PVR_SUB_CMD_TYPE_COMPUTE,
87    PVR_SUB_CMD_TYPE_TRANSFER,
88    PVR_SUB_CMD_TYPE_OCCLUSION_QUERY,
89    PVR_SUB_CMD_TYPE_EVENT,
90 };
91 
92 enum pvr_event_type {
93    PVR_EVENT_TYPE_SET,
94    PVR_EVENT_TYPE_RESET,
95    PVR_EVENT_TYPE_WAIT,
96    PVR_EVENT_TYPE_BARRIER,
97 };
98 
99 enum pvr_depth_stencil_usage {
100    PVR_DEPTH_STENCIL_USAGE_UNDEFINED = 0, /* explicitly treat 0 as undefined */
101    PVR_DEPTH_STENCIL_USAGE_NEEDED,
102    PVR_DEPTH_STENCIL_USAGE_NEVER,
103 };
104 
105 enum pvr_job_type {
106    PVR_JOB_TYPE_GEOM,
107    PVR_JOB_TYPE_FRAG,
108    PVR_JOB_TYPE_COMPUTE,
109    PVR_JOB_TYPE_TRANSFER,
110    PVR_JOB_TYPE_OCCLUSION_QUERY,
111    PVR_JOB_TYPE_MAX
112 };
113 
114 enum pvr_pipeline_type {
115    PVR_PIPELINE_TYPE_INVALID = 0, /* explicitly treat 0 as undefined */
116    PVR_PIPELINE_TYPE_GRAPHICS,
117    PVR_PIPELINE_TYPE_COMPUTE,
118 };
119 
120 enum pvr_pipeline_stage_bits {
121    PVR_PIPELINE_STAGE_GEOM_BIT = BITFIELD_BIT(PVR_JOB_TYPE_GEOM),
122    PVR_PIPELINE_STAGE_FRAG_BIT = BITFIELD_BIT(PVR_JOB_TYPE_FRAG),
123    PVR_PIPELINE_STAGE_COMPUTE_BIT = BITFIELD_BIT(PVR_JOB_TYPE_COMPUTE),
124    PVR_PIPELINE_STAGE_TRANSFER_BIT = BITFIELD_BIT(PVR_JOB_TYPE_TRANSFER),
125    /* Note that this doesn't map to VkPipelineStageFlagBits so be careful with
126     * this.
127     */
128    PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT =
129       BITFIELD_BIT(PVR_JOB_TYPE_OCCLUSION_QUERY),
130 };
131 
132 #define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \
133    (PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT)
134 
135 #define PVR_PIPELINE_STAGE_ALL_BITS                                         \
136    (PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS | PVR_PIPELINE_STAGE_COMPUTE_BIT | \
137     PVR_PIPELINE_STAGE_TRANSFER_BIT)
138 
139 #define PVR_NUM_SYNC_PIPELINE_STAGES 4U
140 
141 /* Warning: Do not define an invalid stage as 0 since other code relies on 0
142  * being the first shader stage. This allows for stages to be split or added
143  * in the future. Defining 0 as invalid will very likely cause problems.
144  */
145 enum pvr_stage_allocation {
146    PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
147    PVR_STAGE_ALLOCATION_FRAGMENT,
148    PVR_STAGE_ALLOCATION_COMPUTE,
149    PVR_STAGE_ALLOCATION_COUNT
150 };
151 
152 enum pvr_filter {
153    PVR_FILTER_DONTCARE, /* Any filtering mode is acceptable. */
154    PVR_FILTER_POINT,
155    PVR_FILTER_LINEAR,
156    PVR_FILTER_BICUBIC,
157 };
158 
159 enum pvr_resolve_op {
160    PVR_RESOLVE_BLEND,
161    PVR_RESOLVE_MIN,
162    PVR_RESOLVE_MAX,
163    PVR_RESOLVE_SAMPLE0,
164    PVR_RESOLVE_SAMPLE1,
165    PVR_RESOLVE_SAMPLE2,
166    PVR_RESOLVE_SAMPLE3,
167    PVR_RESOLVE_SAMPLE4,
168    PVR_RESOLVE_SAMPLE5,
169    PVR_RESOLVE_SAMPLE6,
170    PVR_RESOLVE_SAMPLE7,
171 };
172 
173 enum pvr_event_state {
174    PVR_EVENT_STATE_SET_BY_HOST,
175    PVR_EVENT_STATE_RESET_BY_HOST,
176    PVR_EVENT_STATE_SET_BY_DEVICE,
177    PVR_EVENT_STATE_RESET_BY_DEVICE
178 };
179 
180 enum pvr_deferred_cs_command_type {
181    PVR_DEFERRED_CS_COMMAND_TYPE_DBSC,
182    PVR_DEFERRED_CS_COMMAND_TYPE_DBSC2,
183 };
184 
185 enum pvr_query_type {
186    PVR_QUERY_TYPE_AVAILABILITY_WRITE,
187    PVR_QUERY_TYPE_RESET_QUERY_POOL,
188    PVR_QUERY_TYPE_COPY_QUERY_RESULTS,
189 };
190 
191 union pvr_sampler_descriptor {
192    uint32_t words[PVR_SAMPLER_DESCRIPTOR_SIZE];
193 
194    struct {
195       /* Packed ROGUE_TEXSTATE_SAMPLER. */
196       uint64_t sampler_word;
197       uint32_t compare_op;
198       /* TODO: Figure out what this word is for and rename.
199        * Sampler state word 1?
200        */
201       uint32_t word3;
202    } data;
203 };
204 
205 struct pvr_combined_image_sampler_descriptor {
206    /* | TEXSTATE_IMAGE_WORD0 | TEXSTATE_{STRIDE_,}IMAGE_WORD1 | */
207    uint64_t image[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
208    union pvr_sampler_descriptor sampler;
209 };
210 
211 #define CHECK_STRUCT_FIELD_SIZE(_struct_type, _field_name, _size)      \
212    static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) == \
213                     (_size),                                           \
214                  "Size of '" #_field_name "' in '" #_struct_type       \
215                  "' differs from expected")
216 
217 CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
218                         image,
219                         ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t));
220 CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
221                         image,
222                         PVR_DW_TO_BYTES(PVR_IMAGE_DESCRIPTOR_SIZE));
223 #if 0
224 /* TODO: Don't really want to include pvr_csb.h in here since this header is
225  * shared with the compiler. Figure out a better place for these.
226  */
227 CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
228                         image,
229                         (pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
230                          pvr_cmd_length(TEXSTATE_IMAGE_WORD1)) *
231                            sizeof(uint32_t));
232 CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
233                         image,
234                         (pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
235                          pvr_cmd_length(TEXSTATE_STRIDE_IMAGE_WORD1)) *
236                            sizeof(uint32_t));
237 #endif
238 
239 #undef CHECK_STRUCT_FIELD_SIZE
240 
241 struct pvr_sampler {
242    struct vk_sampler vk;
243 
244    union pvr_sampler_descriptor descriptor;
245 };
246 
247 struct pvr_descriptor_size_info {
248    /* Non-spillable size for storage in the common store. */
249    uint32_t primary;
250 
251    /* Spillable size to accommodate limitation of the common store. */
252    uint32_t secondary;
253 
254    uint32_t alignment;
255 };
256 
257 struct pvr_descriptor_set_layout_binding {
258    VkDescriptorType type;
259 
260    /* "M" in layout(set = N, binding = M)
261     * Can be used to index bindings in the descriptor_set_layout.
262     */
263    uint32_t binding_number;
264 
265    uint32_t descriptor_count;
266 
267    /* Index into the flattened descriptor set */
268    uint16_t descriptor_index;
269 
270    /* Mask of enum pvr_stage_allocation. */
271    uint8_t shader_stage_mask;
272 
273    struct {
274       uint32_t primary;
275       uint32_t secondary;
276    } per_stage_offset_in_dwords[PVR_STAGE_ALLOCATION_COUNT];
277 
278    bool has_immutable_samplers;
279    /* Index at which the samplers can be found in the descriptor_set_layout.
280     * 0 when the samplers are at index 0 or no samplers are present.
281     */
282    uint32_t immutable_samplers_index;
283 };
284 
285 /* All sizes are in dwords. */
286 struct pvr_descriptor_set_layout_mem_layout {
287    uint32_t primary_offset;
288    uint32_t primary_size;
289 
290    uint32_t secondary_offset;
291    uint32_t secondary_size;
292 
293    uint32_t primary_dynamic_size;
294    uint32_t secondary_dynamic_size;
295 };
296 
297 struct pvr_descriptor_set_layout {
298    struct vk_object_base base;
299 
300    /* Total amount of descriptors contained in this set. */
301    uint32_t descriptor_count;
302 
303    /* Count of dynamic buffers. */
304    uint32_t dynamic_buffer_count;
305    uint32_t total_dynamic_size_in_dwords;
306 
307    uint32_t binding_count;
308    struct pvr_descriptor_set_layout_binding *bindings;
309 
310    uint32_t immutable_sampler_count;
311    const struct pvr_sampler **immutable_samplers;
312 
313    /* Shader stages requiring access to descriptors in this set. */
314    /* Mask of enum pvr_stage_allocation. */
315    uint8_t shader_stage_mask;
316 
317    /* Count of each VkDescriptorType per shader stage. Dynamically allocated
318     * arrays per stage as to not hard code the max descriptor type here.
319     *
320     * Note: when adding a new type, it might not numerically follow the
321     * previous type so a sparse array will be created. You might want to
322     * readjust how these arrays are created and accessed.
323     */
324    uint32_t *per_stage_descriptor_count[PVR_STAGE_ALLOCATION_COUNT];
325 
326    uint32_t total_size_in_dwords;
327    struct pvr_descriptor_set_layout_mem_layout
328       memory_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT];
329 };
330 
331 struct pvr_descriptor_pool {
332    struct vk_object_base base;
333 
334    VkAllocationCallbacks alloc;
335 
336    /* Saved information from pCreateInfo. */
337    uint32_t max_sets;
338 
339    uint32_t total_size_in_dwords;
340    uint32_t current_size_in_dwords;
341 
342    /* Derived and other state. */
343    /* List of the descriptor sets created using this pool. */
344    struct list_head descriptor_sets;
345 };
346 
347 struct pvr_descriptor {
348    VkDescriptorType type;
349 
350    union {
351       struct {
352          struct pvr_buffer_view *bview;
353          pvr_dev_addr_t buffer_dev_addr;
354          VkDeviceSize buffer_desc_range;
355          VkDeviceSize buffer_whole_range;
356       };
357 
358       struct {
359          VkImageLayout layout;
360          const struct pvr_image_view *iview;
361          const struct pvr_sampler *sampler;
362       };
363    };
364 };
365 
366 struct pvr_descriptor_set {
367    struct vk_object_base base;
368 
369    const struct pvr_descriptor_set_layout *layout;
370    const struct pvr_descriptor_pool *pool;
371 
372    struct pvr_suballoc_bo *pvr_bo;
373 
374    /* Links this descriptor set into pvr_descriptor_pool::descriptor_sets list.
375     */
376    struct list_head link;
377 
378    /* Array of size layout::descriptor_count. */
379    struct pvr_descriptor descriptors[0];
380 };
381 
382 struct pvr_event {
383    struct vk_object_base base;
384 
385    enum pvr_event_state state;
386    struct vk_sync *sync;
387 };
388 
389 #define PVR_MAX_DYNAMIC_BUFFERS                      \
390    (PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS + \
391     PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS)
392 
393 struct pvr_descriptor_state {
394    struct pvr_descriptor_set *descriptor_sets[PVR_MAX_DESCRIPTOR_SETS];
395    uint32_t valid_mask;
396 
397    uint32_t dynamic_offsets[PVR_MAX_DYNAMIC_BUFFERS];
398 };
399 
400 #undef PVR_MAX_DYNAMIC_BUFFERS
401 
402 /**
403  * \brief Indicates the layout of shared registers allocated by the driver.
404  *
405  * 'present' fields indicate if a certain resource was allocated for, and
406  * whether it will be present in the shareds.
407  * 'offset' fields indicate at which shared reg the resource starts at.
408  */
409 struct pvr_sh_reg_layout {
410    /* If this is present, it will always take up 2 sh regs in size and contain
411     * the device address of the descriptor set addrs table.
412     */
413    struct {
414       bool present;
415       uint32_t offset;
416    } descriptor_set_addrs_table;
417 
418    /* If this is present, it will always take up 2 sh regs in size and contain
419     * the device address of the push constants buffer.
420     */
421    struct {
422       bool present;
423       uint32_t offset;
424    } push_consts;
425 
426    /* If this is present, it will always take up 2 sh regs in size and contain
427     * the device address of the blend constants buffer.
428     */
429    struct {
430       bool present;
431       uint32_t offset;
432    } blend_consts;
433 };
434 
435 struct pvr_pipeline_layout {
436    struct vk_object_base base;
437 
438    uint32_t set_count;
439    /* Contains set_count amount of descriptor set layouts. */
440    struct pvr_descriptor_set_layout *set_layout[PVR_MAX_DESCRIPTOR_SETS];
441 
442    /* Mask of enum pvr_stage_allocation. */
443    uint8_t push_constants_shader_stages;
444    uint32_t vert_push_constants_offset;
445    uint32_t frag_push_constants_offset;
446    uint32_t compute_push_constants_offset;
447 
448    /* Mask of enum pvr_stage_allocation. */
449    uint8_t shader_stage_mask;
450 
451    /* Per stage masks indicating which set in the layout contains any
452     * descriptor of the appropriate types: VK..._{SAMPLER, SAMPLED_IMAGE,
453     * UNIFORM_TEXEL_BUFFER, UNIFORM_BUFFER, STORAGE_BUFFER}.
454     * Shift by the set's number to check the mask (1U << set_num).
455     */
456    uint32_t per_stage_descriptor_masks[PVR_STAGE_ALLOCATION_COUNT];
457 
458    /* Array of descriptor offsets at which the set's descriptors' start, per
459     * stage, within all the sets in the pipeline layout per descriptor type.
460     * Note that we only store into for specific descriptor types
461     * VK_DESCRIPTOR_TYPE_{SAMPLER, SAMPLED_IMAGE, UNIFORM_TEXEL_BUFFER,
462     * UNIFORM_BUFFER, STORAGE_BUFFER}, the rest will be 0.
463     */
464    uint32_t
465       descriptor_offsets[PVR_MAX_DESCRIPTOR_SETS][PVR_STAGE_ALLOCATION_COUNT]
466                         [PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT];
467 
468    /* There is no accounting for dynamics in here. They will be garbage values.
469     */
470    struct pvr_descriptor_set_layout_mem_layout
471       register_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT]
472                                          [PVR_MAX_DESCRIPTOR_SETS];
473 
474    /* TODO: Consider whether this needs to be here. */
475    struct pvr_sh_reg_layout sh_reg_layout_per_stage[PVR_STAGE_ALLOCATION_COUNT];
476 
477    /* All sizes in dwords. */
478    struct pvr_pipeline_layout_reg_info {
479       uint32_t primary_dynamic_size_in_dwords;
480       uint32_t secondary_dynamic_size_in_dwords;
481    } per_stage_reg_info[PVR_STAGE_ALLOCATION_COUNT];
482 };
483 
pvr_compare_layout_binding(const void * a,const void * b)484 static int pvr_compare_layout_binding(const void *a, const void *b)
485 {
486    uint32_t binding_a;
487    uint32_t binding_b;
488 
489    binding_a = ((struct pvr_descriptor_set_layout_binding *)a)->binding_number;
490    binding_b = ((struct pvr_descriptor_set_layout_binding *)b)->binding_number;
491 
492    if (binding_a < binding_b)
493       return -1;
494 
495    if (binding_a > binding_b)
496       return 1;
497 
498    return 0;
499 }
500 
501 /* This function does not assume that the binding will always exist for a
502  * particular binding_num. Caller should check before using the return pointer.
503  */
504 static struct pvr_descriptor_set_layout_binding *
pvr_get_descriptor_binding(const struct pvr_descriptor_set_layout * layout,const uint32_t binding_num)505 pvr_get_descriptor_binding(const struct pvr_descriptor_set_layout *layout,
506                            const uint32_t binding_num)
507 {
508    struct pvr_descriptor_set_layout_binding binding;
509    binding.binding_number = binding_num;
510 
511    return bsearch(&binding,
512                   layout->bindings,
513                   layout->binding_count,
514                   sizeof(binding),
515                   pvr_compare_layout_binding);
516 }
517 
518 #endif /* PVR_COMMON_H */
519