• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32 
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38 
39 #include "vk_descriptor_update_template.h"
40 #include "vk_device.h"
41 #include "vk_device_memory.h"
42 #include "vk_format.h"
43 #include "vk_instance.h"
44 #include "vk_image.h"
45 #include "vk_log.h"
46 #include "vk_physical_device.h"
47 #include "vk_shader_module.h"
48 #include "vk_sync.h"
49 #include "vk_sync_timeline.h"
50 #include "vk_util.h"
51 #include "vk_ycbcr_conversion.h"
52 
53 #include "vk_command_buffer.h"
54 #include "vk_command_pool.h"
55 #include "vk_queue.h"
56 #include "vk_pipeline.h"
57 
58 #include <xf86drm.h>
59 
60 #ifdef HAVE_VALGRIND
61 #include <valgrind.h>
62 #include <memcheck.h>
63 #define VG(x) x
64 #else
65 #define VG(x) ((void)0)
66 #endif
67 
68 #include "util/detect_os.h"
69 
70 #if DETECT_OS_ANDROID
71 #include <vndk/hardware_buffer.h>
72 #include "util/u_gralloc/u_gralloc.h"
73 #endif
74 
75 #include "v3dv_limits.h"
76 
77 #include "common/v3d_device_info.h"
78 #include "common/v3d_limits.h"
79 #include "common/v3d_tiling.h"
80 #include "common/v3d_util.h"
81 
82 #include "compiler/shader_enums.h"
83 #include "compiler/spirv/nir_spirv.h"
84 
85 #include "compiler/v3d_compiler.h"
86 
87 #include "vk_debug_report.h"
88 #include "util/set.h"
89 #include "util/hash_table.h"
90 #include "util/sparse_array.h"
91 #include "util/xmlconfig.h"
92 #include "util/u_atomic.h"
93 
94 #include "v3dv_entrypoints.h"
95 #include "v3dv_bo.h"
96 
97 #include "drm-uapi/v3d_drm.h"
98 
99 #include "vk_alloc.h"
100 #include "simulator/v3d_simulator.h"
101 
102 #include "v3dv_cl.h"
103 
104 #include "wsi_common.h"
105 
106 /* A non-fatal assert.  Useful for debugging. */
107 #ifdef DEBUG
108 #define v3dv_assert(x) ({ \
109    if (unlikely(!(x))) \
110       fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
111 })
112 #else
113 #define v3dv_assert(x)
114 #endif
115 
116 #define perf_debug(...) do {                       \
117    if (V3D_DBG(PERF))                            \
118       fprintf(stderr, __VA_ARGS__);                \
119 } while (0)
120 
121 struct v3dv_instance;
122 
123 #ifdef USE_V3D_SIMULATOR
124 #define using_v3d_simulator true
125 #else
126 #define using_v3d_simulator false
127 #endif
128 
129 struct v3d_simulator_file;
130 
131 /* Minimum required by the Vulkan 1.1 spec */
132 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
133 
134 /* Maximum performance counters number */
135 #define V3D_MAX_PERFCNT 93
136 
137 struct v3dv_physical_device {
138    struct vk_physical_device vk;
139 
140    char *name;
141    int32_t render_fd;
142    int32_t display_fd;
143 
144    /* We need these because it is not clear how to detect
145     * valid devids in a portable way
146      */
147    bool has_primary;
148    bool has_render;
149 
150    dev_t primary_devid;
151    dev_t render_devid;
152 
153 #if using_v3d_simulator
154    uint32_t device_id;
155 #endif
156 
157    uint8_t driver_build_sha1[20];
158    uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
159    uint8_t device_uuid[VK_UUID_SIZE];
160    uint8_t driver_uuid[VK_UUID_SIZE];
161 
162    struct vk_sync_type drm_syncobj_type;
163    struct vk_sync_timeline_type sync_timeline_type;
164    const struct vk_sync_type *sync_types[3];
165 
166    struct disk_cache *disk_cache;
167 
168    mtx_t mutex;
169 
170    struct wsi_device wsi_device;
171 
172    VkPhysicalDeviceMemoryProperties memory;
173 
174    struct v3d_device_info devinfo;
175 
176    struct v3d_simulator_file *sim_file;
177 
178    const struct v3d_compiler *compiler;
179    uint32_t next_program_id;
180 
181    alignas(8) uint64_t heap_used;
182 
183    /* This array holds all our 'struct v3dv_bo' allocations. We use this
184     * so we can add a refcount to our BOs and check if a particular BO
185     * was already allocated in this device using its GEM handle. This is
186     * necessary to properly manage BO imports, because the kernel doesn't
187     * refcount the underlying BO memory.
188     *
189     * Specifically, when self-importing (i.e. importing a BO into the same
190     * device that created it), the kernel will give us the same BO handle
191     * for both BOs and we must only free it once when  both references are
192     * freed. Otherwise, if we are not self-importing, we get two different BO
193     * handles, and we want to free each one individually.
194     *
195     * The BOs in this map all have a refcnt with the reference counter and
196     * only self-imported BOs will ever have a refcnt > 1.
197     */
198    struct util_sparse_array bo_map;
199 
200    struct {
201       bool merge_jobs;
202    } options;
203 
204    struct {
205       bool cpu_queue;
206       bool multisync;
207       bool perfmon;
208    } caps;
209 };
210 
211 static inline struct v3dv_bo *
v3dv_device_lookup_bo(struct v3dv_physical_device * device,uint32_t handle)212 v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
213 {
214    return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
215 }
216 
217 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
218 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
219 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
220                                                      uint32_t index);
221 
222 void v3dv_meta_clear_init(struct v3dv_device *device);
223 void v3dv_meta_clear_finish(struct v3dv_device *device);
224 
225 void v3dv_meta_blit_init(struct v3dv_device *device);
226 void v3dv_meta_blit_finish(struct v3dv_device *device);
227 
228 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
229 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
230 
231 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
232                            uint8_t plane,
233                            uint8_t miplevel,
234                            const VkOffset3D *offset,
235                            const VkExtent3D *extent,
236                            VkFormat *compat_format);
237 
238 struct v3dv_instance {
239    struct vk_instance vk;
240 
241    bool pipeline_cache_enabled;
242    bool default_pipeline_cache_enabled;
243 };
244 
245 /* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
246  * tfu), we still need a syncobj to track the last overall job submitted
247  * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
248  * start expecting multisync to be present and drop the legacy implementation
249  * together with this V3DV_QUEUE_ANY tracker.
250  */
251 enum v3dv_queue_type {
252    V3DV_QUEUE_CL = 0,
253    V3DV_QUEUE_CSD,
254    V3DV_QUEUE_TFU,
255    V3DV_QUEUE_CPU,
256    V3DV_QUEUE_ANY,
257    V3DV_QUEUE_COUNT,
258 };
259 
260 /* For each GPU queue, we use a syncobj to track the last job submitted. We
261  * set the flag `first` to determine when we are starting a new cmd buffer
262  * batch and therefore a job submitted to a given queue will be the first in a
263  * cmd buf batch.
264  */
265 struct v3dv_last_job_sync {
266    /* If the job is the first submitted to a GPU queue in a cmd buffer batch.
267     *
268     * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync.
269     */
270    bool first[V3DV_QUEUE_COUNT];
271    /* Array of syncobj to track the last job submitted to a GPU queue.
272     *
273     * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each
274     * queue, but without multisync we only track the last job submitted to any
275     * queue in V3DV_QUEUE_ANY.
276     */
277    uint32_t syncs[V3DV_QUEUE_COUNT];
278 };
279 
280 struct v3dv_queue {
281    struct vk_queue vk;
282 
283    struct v3dv_device *device;
284 
285    struct v3dv_last_job_sync last_job_syncs;
286 
287    struct v3dv_job *noop_job;
288 
289    /* The last active perfmon ID to prevent mixing of counter results when a
290     * job is submitted with a different perfmon id.
291     */
292    uint32_t last_perfmon_id;
293 };
294 
295 VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
296                                   struct vk_queue_submit *submit);
297 
298 #define V3DV_META_BLIT_CACHE_KEY_SIZE              (4 * sizeof(uint32_t))
299 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
300                                                     sizeof(VkComponentMapping))
301 
302 struct v3dv_meta_color_clear_pipeline {
303    VkPipeline pipeline;
304    VkRenderPass pass;
305    bool cached;
306    uint64_t key;
307 };
308 
309 struct v3dv_meta_depth_clear_pipeline {
310    VkPipeline pipeline;
311    uint64_t key;
312 };
313 
314 struct v3dv_meta_blit_pipeline {
315    VkPipeline pipeline;
316    VkRenderPass pass;
317    VkRenderPass pass_no_load;
318    uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
319 };
320 
321 struct v3dv_meta_texel_buffer_copy_pipeline {
322    VkPipeline pipeline;
323    VkRenderPass pass;
324    VkRenderPass pass_no_load;
325    uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
326 };
327 
328 struct v3dv_pipeline_key {
329    uint8_t topology;
330    uint8_t logicop_func;
331    bool msaa;
332    bool sample_alpha_to_coverage;
333    bool sample_alpha_to_one;
334    uint8_t cbufs;
335    struct {
336       enum pipe_format format;
337       uint8_t swizzle[4];
338    } color_fmt[V3D_MAX_DRAW_BUFFERS];
339    uint8_t f32_color_rb;
340    uint32_t va_swap_rb_mask;
341    bool has_multiview;
342 };
343 
344 struct v3dv_pipeline_cache_stats {
345    uint32_t miss;
346    uint32_t hit;
347    uint32_t count;
348    uint32_t on_disk_hit;
349 };
350 
351 /* Equivalent to gl_shader_stage, but including the coordinate shaders
352  *
353  * FIXME: perhaps move to common
354  */
355 enum broadcom_shader_stage {
356    BROADCOM_SHADER_VERTEX,
357    BROADCOM_SHADER_VERTEX_BIN,
358    BROADCOM_SHADER_GEOMETRY,
359    BROADCOM_SHADER_GEOMETRY_BIN,
360    BROADCOM_SHADER_FRAGMENT,
361    BROADCOM_SHADER_COMPUTE,
362 };
363 
364 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
365 
366 /* Assumes that coordinate shaders will be custom-handled by the caller */
367 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)368 gl_shader_stage_to_broadcom(gl_shader_stage stage)
369 {
370    switch (stage) {
371    case MESA_SHADER_VERTEX:
372       return BROADCOM_SHADER_VERTEX;
373    case MESA_SHADER_GEOMETRY:
374       return BROADCOM_SHADER_GEOMETRY;
375    case MESA_SHADER_FRAGMENT:
376       return BROADCOM_SHADER_FRAGMENT;
377    case MESA_SHADER_COMPUTE:
378       return BROADCOM_SHADER_COMPUTE;
379    default:
380       unreachable("Unknown gl shader stage");
381    }
382 }
383 
384 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)385 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
386 {
387    switch (stage) {
388    case BROADCOM_SHADER_VERTEX:
389    case BROADCOM_SHADER_VERTEX_BIN:
390       return MESA_SHADER_VERTEX;
391    case BROADCOM_SHADER_GEOMETRY:
392    case BROADCOM_SHADER_GEOMETRY_BIN:
393       return MESA_SHADER_GEOMETRY;
394    case BROADCOM_SHADER_FRAGMENT:
395       return MESA_SHADER_FRAGMENT;
396    case BROADCOM_SHADER_COMPUTE:
397       return MESA_SHADER_COMPUTE;
398    default:
399       unreachable("Unknown broadcom shader stage");
400    }
401 }
402 
403 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)404 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
405 {
406    switch (stage) {
407    case BROADCOM_SHADER_VERTEX_BIN:
408    case BROADCOM_SHADER_GEOMETRY_BIN:
409       return true;
410    default:
411       return false;
412    }
413 }
414 
415 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)416 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
417 {
418    switch (stage) {
419    case BROADCOM_SHADER_VERTEX:
420    case BROADCOM_SHADER_GEOMETRY:
421       return true;
422    default:
423       return false;
424    }
425 }
426 
427 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)428 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
429 {
430    switch (stage) {
431    case BROADCOM_SHADER_VERTEX:
432       return BROADCOM_SHADER_VERTEX_BIN;
433    case BROADCOM_SHADER_GEOMETRY:
434       return BROADCOM_SHADER_GEOMETRY_BIN;
435    default:
436       unreachable("Invalid shader stage");
437    }
438 }
439 
440 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)441 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
442 {
443    switch(stage) {
444    case BROADCOM_SHADER_VERTEX_BIN:
445       return "MESA_SHADER_VERTEX_BIN";
446    case BROADCOM_SHADER_GEOMETRY_BIN:
447       return "MESA_SHADER_GEOMETRY_BIN";
448    default:
449       return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
450    }
451 }
452 
453 struct v3dv_pipeline_cache {
454    struct vk_object_base base;
455 
456    struct v3dv_device *device;
457    mtx_t mutex;
458 
459    struct hash_table *nir_cache;
460    struct v3dv_pipeline_cache_stats nir_stats;
461 
462    struct hash_table *cache;
463    struct v3dv_pipeline_cache_stats stats;
464 
465    /* For VK_EXT_pipeline_creation_cache_control. */
466    bool externally_synchronized;
467 };
468 
469 struct v3dv_device {
470    struct vk_device vk;
471 
472    struct v3dv_instance *instance;
473    struct v3dv_physical_device *pdevice;
474 
475    struct v3d_device_info devinfo;
476    struct v3dv_queue queue;
477 
478    /* Guards query->maybe_available and value for timestamps */
479    mtx_t query_mutex;
480 
481    /* Signaled whenever a query is ended */
482    cnd_t query_ended;
483 
484    /* Resources used for meta operations */
485    struct {
486       mtx_t mtx;
487       struct {
488          VkPipelineLayout p_layout;
489          struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
490       } color_clear;
491       struct {
492          VkPipelineLayout p_layout;
493          struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
494       } depth_clear;
495       struct {
496          VkDescriptorSetLayout ds_layout;
497          VkPipelineLayout p_layout;
498          struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
499       } blit;
500       struct {
501          VkDescriptorSetLayout ds_layout;
502          VkPipelineLayout p_layout;
503          struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
504       } texel_buffer_copy;
505    } meta;
506 
507    struct v3dv_bo_cache {
508       /** List of struct v3d_bo freed, by age. */
509       struct list_head time_list;
510       /** List of struct v3d_bo freed, per size, by age. */
511       struct list_head *size_list;
512       uint32_t size_list_size;
513 
514       mtx_t lock;
515 
516       uint32_t cache_size;
517       uint32_t cache_count;
518       uint32_t max_cache_size;
519    } bo_cache;
520 
521    uint32_t bo_size;
522    uint32_t bo_count;
523 
524    /* Event handling resources.
525     *
526     * Our implementation of events uses a BO to store event state (signaled vs
527     * reset) and dispatches compute shaders to handle GPU event functions
528     * (signal, reset, wait). This struct holds all the resources required
529     * by the implementation.
530     */
531    struct {
532       mtx_t lock;
533 
534       /* BO for the event states: signaled (1) or reset (0) */
535       struct v3dv_bo *bo;
536 
537       /* We pre-allocate all the events we can fit for the size of the BO we
538        * create to track their states, where each event has an index which is
539        * basically the offset of its state in that BO. We keep a free list with
540        * the pre-allocated events that are available.
541        */
542       uint32_t event_count;
543       struct v3dv_event *events;
544       struct list_head free_list;
545 
546       /* Vulkan resources to access the event BO from shaders. We have a
547        * pipeline that sets the state of an event and another that waits on
548        * a single event. Both pipelines require access to the event state BO,
549        * for which we need to allocate a single descripot set.
550        */
551       VkBuffer buffer;
552       VkDeviceMemory mem;
553       VkDescriptorSetLayout descriptor_set_layout;
554       VkPipelineLayout pipeline_layout;
555       VkDescriptorPool descriptor_pool;
556       VkDescriptorSet descriptor_set;
557       VkPipeline set_event_pipeline;
558       VkPipeline wait_event_pipeline;
559    } events;
560 
561    /* Query handling resources.
562     *
563     * Our implementation of occlusion queries uses a BO per pool to keep track
564     * of the per-query availability state and dispatches compute shaders to
565     * handle GPU query functions that read and write that state. This struct
566     * holds Vulkan resources that can be shared across all query pools to
567     * implement this. This framework may be extended in the future to handle
568     * more query types.
569     */
570    struct {
571       VkDescriptorSetLayout buf_descriptor_set_layout;
572 
573       /* Set query availability */
574       VkPipelineLayout avail_pipeline_layout;
575       VkPipeline avail_pipeline;
576 
577       /* Reset query availability and clear occlusion counters */
578       VkPipelineLayout reset_occlusion_pipeline_layout;
579       VkPipeline reset_occlusion_pipeline;
580 
581       /* Copy query results */
582       VkPipelineLayout copy_pipeline_layout;
583       VkPipeline copy_pipeline[8];
584    } queries;
585 
586    struct v3dv_pipeline_cache default_pipeline_cache;
587 
588    /* GL_SHADER_STATE_RECORD needs to specify default attribute values. The
589     * following covers the most common case, that is all attributes format
590     * being float being float, allowing us to reuse the same BO for all
591     * pipelines matching this requirement. Pipelines that need integer
592     * attributes will create their own BO.
593     *
594     * Note that since v71 the default attribute values are not needed, so this
595     * can be NULL.
596     */
597    struct v3dv_bo *default_attribute_float;
598 
599    void *device_address_mem_ctx;
600    struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */
601 
602 #if DETECT_OS_ANDROID
603    struct u_gralloc *gralloc;
604 #endif
605 };
606 
607 struct v3dv_device_memory {
608    struct vk_device_memory vk;
609 
610    struct v3dv_bo *bo;
611    const VkMemoryType *type;
612    bool is_for_wsi;
613    bool is_for_device_address;
614 };
615 
616 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
617 #define TEXTURE_DATA_FORMAT_NO     255
618 
619 #define V3DV_MAX_PLANE_COUNT 3
620 struct v3dv_format_plane {
621    /* One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
622    uint8_t rt_type;
623 
624    /* One of V3D42_TEXTURE_DATA_FORMAT_*. */
625    uint8_t tex_type;
626 
627    /* Swizzle to apply to the RGBA shader output for storing to the tile
628     * buffer, to the RGBA tile buffer to produce shader input (for
629     * blending), and for turning the rgba8888 texture sampler return
630     * value into shader rgba values.
631     */
632    uint8_t swizzle[4];
633 
634    /* Whether the return value is 16F/I/UI or 32F/I/UI. */
635    uint8_t return_size;
636 };
637 
638 struct v3dv_format {
639    /* Non 0 plane count implies supported */
640    uint8_t plane_count;
641 
642    struct v3dv_format_plane planes[V3DV_MAX_PLANE_COUNT];
643 
644    /* If the format supports (linear) filtering when texturing. */
645    bool supports_filtering;
646 };
647 
648 /* Note that although VkImageAspectFlags would allow to combine more than one
649  * PLANE bit, for all the use cases we implement that use VkImageAspectFlags,
650  * only one plane is allowed, like for example vkCmdCopyImage:
651  *
652  *   "If srcImage has a VkFormat with two planes then for each element of
653  *    pRegions, srcSubresource.aspectMask must be VK_IMAGE_ASPECT_PLANE_0_BIT
654  *    or VK_IMAGE_ASPECT_PLANE_1_BIT"
655  *
656  */
v3dv_plane_from_aspect(VkImageAspectFlags aspect)657 static uint8_t v3dv_plane_from_aspect(VkImageAspectFlags aspect)
658 {
659    switch (aspect) {
660    case VK_IMAGE_ASPECT_COLOR_BIT:
661    case VK_IMAGE_ASPECT_DEPTH_BIT:
662    case VK_IMAGE_ASPECT_STENCIL_BIT:
663    case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
664    case VK_IMAGE_ASPECT_PLANE_0_BIT:
665    case VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT:
666       return 0;
667    case VK_IMAGE_ASPECT_PLANE_1_BIT:
668    case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
669       return 1;
670    case VK_IMAGE_ASPECT_PLANE_2_BIT:
671    case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
672       return 2;
673    default:
674       unreachable("invalid image aspect");
675    }
676 }
677 
678 struct v3d_resource_slice {
679    uint32_t offset;
680    uint32_t stride;
681    uint32_t padded_height;
682    uint32_t width;
683    uint32_t height;
684    /* Size of a single pane of the slice.  For 3D textures, there will be
685     * a number of panes equal to the minified, power-of-two-aligned
686     * depth.
687     */
688    uint32_t size;
689    uint8_t ub_pad;
690    enum v3d_tiling_mode tiling;
691    uint32_t padded_height_of_output_image_in_uif_blocks;
692 };
693 
694 bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
695 bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
696 
697 struct v3dv_image {
698    struct vk_image vk;
699 
700    const struct v3dv_format *format;
701    bool tiled;
702 
703    uint8_t plane_count;
704 
705    /* If 0, this is a multi-plane image with use disjoint memory, where each
706     * plane binds a different device memory. Otherwise, all the planes share
707     * the same device memory and this stores the total size of the image in
708     * bytes.
709     */
710    uint32_t non_disjoint_size;
711 
712    struct {
713       uint32_t cpp;
714 
715       struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
716       /* Total size of the plane in bytes. */
717       uint64_t size;
718       uint32_t cube_map_stride;
719 
720       /* If not using disjoint memory, mem and mem_offset is the same for all
721        * planes, in which case mem_offset is the offset of plane 0.
722        */
723       struct v3dv_device_memory *mem;
724       VkDeviceSize mem_offset;
725       uint32_t alignment;
726 
727       /* Pre-subsampled per plane width and height
728        */
729       uint32_t width;
730       uint32_t height;
731 
732       /* Even if we can get it from the parent image format, we keep the
733        * format here for convenience
734        */
735       VkFormat vk_format;
736    } planes[V3DV_MAX_PLANE_COUNT];
737 
738    /* Used only when sampling a linear texture (which V3D doesn't support).
739     * This holds a tiled copy of the image we can use for that purpose.
740     */
741    struct v3dv_image *shadow;
742 
743 #if DETECT_OS_ANDROID
744    /* Image is backed by VK_ANDROID_native_buffer, */
745    bool is_native_buffer_memory;
746    /* Image is backed by VK_ANDROID_external_memory_android_hardware_buffer */
747    bool is_ahb;
748    VkImageDrmFormatModifierExplicitCreateInfoEXT *android_explicit_layout;
749    VkSubresourceLayout *android_plane_layouts;
750 #endif
751 };
752 
753 VkResult
754 v3dv_image_init(struct v3dv_device *device,
755                 const VkImageCreateInfo *pCreateInfo,
756                 const VkAllocationCallbacks *pAllocator,
757                 struct v3dv_image *image);
758 
759 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
760 
761 static uint32_t
v3dv_image_aspect_to_plane(const struct v3dv_image * image,VkImageAspectFlagBits aspect)762 v3dv_image_aspect_to_plane(const struct v3dv_image *image,
763                            VkImageAspectFlagBits aspect)
764 {
765    assert(util_bitcount(aspect) == 1 && (aspect & image->vk.aspects));
766 
767    /* Because we always put image and view planes in aspect-bit-order, the
768     * plane index is the number of bits in the image aspect before aspect.
769     */
770    return util_bitcount(image->vk.aspects & (aspect - 1));
771 }
772 
773 /* Pre-generating packets needs to consider changes in packet sizes across hw
774  * versions. Keep things simple and allocate enough space for any supported
775  * version. We ensure the size is large enough through static asserts.
776  */
777 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
778 #define V3DV_SAMPLER_STATE_LENGTH 24
779 #define V3DV_BLEND_CFG_LENGTH 5
780 #define V3DV_CFG_BITS_LENGTH 4
781 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
782 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
783 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
784 #define V3DV_STENCIL_CFG_LENGTH 6
785 
786 struct v3dv_image_view {
787    struct vk_image_view vk;
788 
789    const struct v3dv_format *format;
790 
791    uint8_t view_swizzle[4];
792 
793    uint8_t plane_count;
794    struct {
795       uint8_t image_plane;
796 
797       bool swap_rb;
798       bool channel_reverse;
799       uint32_t internal_bpp;
800       uint32_t internal_type;
801       uint32_t offset;
802 
803       /* Precomputed swizzle (composed from the view swizzle and the format
804        * swizzle).
805        *
806        * This could be also included on the descriptor bo, but the shader state
807        * packet doesn't need it on a bo, so we can just avoid a memory copy
808        */
809       uint8_t swizzle[4];
810 
811       /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
812        * during UpdateDescriptorSets.
813        *
814        * Empirical tests show that cube arrays need a different shader state
815        * depending on whether they are used with a sampler or not, so for these
816        * we generate two states and select the one to use based on the descriptor
817        * type.
818        */
819       uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
820    } planes[V3DV_MAX_PLANE_COUNT];
821 
822    /* Used only when sampling a linear texture (which V3D doesn't support).
823     * This would represent a view over the tiled shadow image.
824     */
825    struct v3dv_image_view *shadow;
826 };
827 
828 VkResult v3dv_create_image_view(struct v3dv_device *device,
829                                 const VkImageViewCreateInfo *pCreateInfo,
830                                 VkImageView *pView);
831 
832 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer,
833                            uint8_t plane);
834 
835 struct v3dv_buffer {
836    struct vk_object_base base;
837 
838    VkDeviceSize size;
839    VkBufferUsageFlags usage;
840    uint32_t alignment;
841 
842    struct v3dv_device_memory *mem;
843    VkDeviceSize mem_offset;
844 };
845 
846 void
847 v3dv_buffer_init(struct v3dv_device *device,
848                  const VkBufferCreateInfo *pCreateInfo,
849                  struct v3dv_buffer *buffer,
850                  uint32_t alignment);
851 
852 void
853 v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo *info);
854 
855 struct v3dv_buffer_view {
856    struct vk_object_base base;
857 
858    struct v3dv_buffer *buffer;
859 
860    VkFormat vk_format;
861    const struct v3dv_format *format;
862    uint32_t internal_bpp;
863    uint32_t internal_type;
864 
865    uint32_t offset;
866    uint32_t size;
867    uint32_t num_elements;
868 
869    /* Prepacked TEXTURE_SHADER_STATE. */
870    uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
871 };
872 
873 struct v3dv_subpass_attachment {
874    uint32_t attachment;
875    VkImageLayout layout;
876 };
877 
878 struct v3dv_subpass {
879    uint32_t input_count;
880    struct v3dv_subpass_attachment *input_attachments;
881 
882    uint32_t color_count;
883    struct v3dv_subpass_attachment *color_attachments;
884    struct v3dv_subpass_attachment *resolve_attachments;
885 
886    struct v3dv_subpass_attachment ds_attachment;
887    struct v3dv_subpass_attachment ds_resolve_attachment;
888    bool resolve_depth, resolve_stencil;
889 
890    /* If we need to emit the clear of the depth/stencil attachment using a
891     * a draw call instead of using the TLB (GFXH-1461).
892     */
893    bool do_depth_clear_with_draw;
894    bool do_stencil_clear_with_draw;
895 
896    /* Multiview */
897    uint32_t view_mask;
898 };
899 
900 struct v3dv_render_pass_attachment {
901    VkAttachmentDescription2 desc;
902 
903    uint32_t first_subpass;
904    uint32_t last_subpass;
905 
906    /* When multiview is enabled, we no longer care about when a particular
907     * attachment is first or last used in a render pass, since not all views
908     * in the attachment will meet that criteria. Instead, we need to track
909     * each individual view (layer) in each attachment and emit our stores,
910     * loads and clears accordingly.
911     */
912    struct {
913       uint32_t first_subpass;
914       uint32_t last_subpass;
915    } views[MAX_MULTIVIEW_VIEW_COUNT];
916 
917    /* If this is a multisampled attachment that is going to be resolved,
918     * whether we may be able to use the TLB hardware resolve based on the
919     * attachment format.
920     */
921    bool try_tlb_resolve;
922 };
923 
924 struct v3dv_render_pass {
925    struct vk_object_base base;
926 
927    bool multiview_enabled;
928 
929    uint32_t attachment_count;
930    struct v3dv_render_pass_attachment *attachments;
931 
932    uint32_t subpass_count;
933    struct v3dv_subpass *subpasses;
934 
935    struct v3dv_subpass_attachment *subpass_attachments;
936 };
937 
938 struct v3dv_framebuffer {
939    struct vk_object_base base;
940 
941    uint32_t width;
942    uint32_t height;
943    uint32_t layers;
944 
945    /* Typically, edge tiles in the framebuffer have padding depending on the
946     * underlying tiling layout. One consequence of this is that when the
947     * framebuffer dimensions are not aligned to tile boundaries, tile stores
948     * would still write full tiles on the edges and write to the padded area.
949     * If the framebuffer is aliasing a smaller region of a larger image, then
950     * we need to be careful with this though, as we won't have padding on the
951     * edge tiles (which typically means that we need to load the tile buffer
952     * before we store).
953     */
954    bool has_edge_padding;
955 
956    uint32_t attachment_count;
957    uint32_t color_attachment_count;
958 
959    /* Notice that elements in 'attachments' will be NULL if the framebuffer
960     * was created imageless. The driver is expected to access attachment info
961     * from the command buffer state instead.
962     */
963    struct v3dv_image_view *attachments[0];
964 };
965 
966 struct v3dv_frame_tiling {
967    uint32_t width;
968    uint32_t height;
969    uint32_t layers;
970    uint32_t render_target_count;
971    uint32_t internal_bpp;
972    uint32_t total_color_bpp;
973    bool     msaa;
974    bool     double_buffer;
975    uint32_t tile_width;
976    uint32_t tile_height;
977    uint32_t draw_tiles_x;
978    uint32_t draw_tiles_y;
979    uint32_t supertile_width;
980    uint32_t supertile_height;
981    uint32_t frame_width_in_supertiles;
982    uint32_t frame_height_in_supertiles;
983 };
984 
985 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
986                                        const VkRect2D *area,
987                                        struct v3dv_framebuffer *fb,
988                                        struct v3dv_render_pass *pass,
989                                        uint32_t subpass_idx);
990 
991 /* Checks if we need to emit 2 initial tile clears for double buffer mode.
992  * This happens when we render at least 2 tiles, because in this mode each
993  * tile uses a different half of the tile buffer memory so we can have 2 tiles
994  * in flight (one being stored to memory and the next being rendered). In this
995  * scenario, if we emit a single initial tile clear we would only clear the
996  * first half of the tile buffer.
997  */
998 static inline bool
v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling * tiling)999 v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
1000 {
1001    return tiling->double_buffer &&
1002           (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
1003            tiling->layers > 1);
1004 }
1005 
1006 enum v3dv_cmd_buffer_status {
1007    V3DV_CMD_BUFFER_STATUS_NEW           = 0,
1008    V3DV_CMD_BUFFER_STATUS_INITIALIZED   = 1,
1009    V3DV_CMD_BUFFER_STATUS_RECORDING     = 2,
1010    V3DV_CMD_BUFFER_STATUS_EXECUTABLE    = 3
1011 };
1012 
1013 union v3dv_clear_value {
1014    uint32_t color[4];
1015    struct {
1016       float z;
1017       uint8_t s;
1018    };
1019 };
1020 
1021 struct v3dv_cmd_buffer_attachment_state {
1022    /* The original clear value as provided by the Vulkan API */
1023    VkClearValue vk_clear_value;
1024 
1025    /* The hardware clear value */
1026    union v3dv_clear_value clear_value;
1027 
1028    /* The underlying image view (from the framebuffer or, if imageless
1029     * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
1030     */
1031    struct v3dv_image_view *image_view;
1032 
1033    /* If this is a multisampled attachment with a resolve operation. */
1034    bool has_resolve;
1035 
1036    /* If this is a multisampled attachment with a resolve operation,
1037     * whether we can use the TLB for the resolve.
1038     */
1039    bool use_tlb_resolve;
1040 };
1041 
1042 struct v3dv_viewport_state {
1043    uint32_t count;
1044    VkViewport viewports[MAX_VIEWPORTS];
1045    float translate[MAX_VIEWPORTS][3];
1046    float scale[MAX_VIEWPORTS][3];
1047 };
1048 
1049 struct v3dv_scissor_state {
1050    uint32_t count;
1051    VkRect2D scissors[MAX_SCISSORS];
1052 };
1053 
1054 /* Mostly a v3dv mapping of VkDynamicState, used to track which data as
1055  * defined as dynamic
1056  */
1057 enum v3dv_dynamic_state_bits {
1058    V3DV_DYNAMIC_VIEWPORT                  = 1 << 0,
1059    V3DV_DYNAMIC_SCISSOR                   = 1 << 1,
1060    V3DV_DYNAMIC_STENCIL_COMPARE_MASK      = 1 << 2,
1061    V3DV_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 3,
1062    V3DV_DYNAMIC_STENCIL_REFERENCE         = 1 << 4,
1063    V3DV_DYNAMIC_BLEND_CONSTANTS           = 1 << 5,
1064    V3DV_DYNAMIC_DEPTH_BIAS                = 1 << 6,
1065    V3DV_DYNAMIC_LINE_WIDTH                = 1 << 7,
1066    V3DV_DYNAMIC_COLOR_WRITE_ENABLE        = 1 << 8,
1067    V3DV_DYNAMIC_DEPTH_BOUNDS              = 1 << 9,
1068    V3DV_DYNAMIC_ALL                       = (1 << 10) - 1,
1069 };
1070 
1071 /* Flags for dirty pipeline state.
1072  */
1073 enum v3dv_cmd_dirty_bits {
1074    V3DV_CMD_DIRTY_VIEWPORT                  = 1 << 0,
1075    V3DV_CMD_DIRTY_SCISSOR                   = 1 << 1,
1076    V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK      = 1 << 2,
1077    V3DV_CMD_DIRTY_STENCIL_WRITE_MASK        = 1 << 3,
1078    V3DV_CMD_DIRTY_STENCIL_REFERENCE         = 1 << 4,
1079    V3DV_CMD_DIRTY_PIPELINE                  = 1 << 5,
1080    V3DV_CMD_DIRTY_COMPUTE_PIPELINE          = 1 << 6,
1081    V3DV_CMD_DIRTY_VERTEX_BUFFER             = 1 << 7,
1082    V3DV_CMD_DIRTY_INDEX_BUFFER              = 1 << 8,
1083    V3DV_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 9,
1084    V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS   = 1 << 10,
1085    V3DV_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 11,
1086    V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO        = 1 << 12,
1087    V3DV_CMD_DIRTY_BLEND_CONSTANTS           = 1 << 13,
1088    V3DV_CMD_DIRTY_OCCLUSION_QUERY           = 1 << 14,
1089    V3DV_CMD_DIRTY_DEPTH_BIAS                = 1 << 15,
1090    V3DV_CMD_DIRTY_LINE_WIDTH                = 1 << 16,
1091    V3DV_CMD_DIRTY_VIEW_INDEX                = 1 << 17,
1092    V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE        = 1 << 18,
1093    V3DV_CMD_DIRTY_DEPTH_BOUNDS              = 1 << 19,
1094    V3DV_CMD_DIRTY_DRAW_ID                   = 1 << 20,
1095 };
1096 
1097 struct v3dv_dynamic_state {
1098    /**
1099     * Bitmask of (1 << VK_DYNAMIC_STATE_*).
1100     * Defines the set of saved dynamic state.
1101     */
1102    uint32_t mask;
1103 
1104    struct v3dv_viewport_state viewport;
1105 
1106    struct v3dv_scissor_state scissor;
1107 
1108    struct {
1109       uint32_t front;
1110       uint32_t back;
1111    } stencil_compare_mask;
1112 
1113    struct {
1114       uint32_t front;
1115       uint32_t back;
1116    } stencil_write_mask;
1117 
1118    struct {
1119       uint32_t front;
1120       uint32_t back;
1121    } stencil_reference;
1122 
1123    float blend_constants[4];
1124 
1125    struct {
1126       float constant_factor;
1127       float depth_bias_clamp;
1128       float slope_factor;
1129    } depth_bias;
1130 
1131    struct {
1132       float                                     min;
1133       float                                     max;
1134    } depth_bounds;
1135 
1136    float line_width;
1137 
1138    uint32_t color_write_enable;
1139 };
1140 
1141 void v3dv_viewport_compute_xform(const VkViewport *viewport,
1142                                  float scale[3],
1143                                  float translate[3]);
1144 
1145 enum v3dv_ez_state {
1146    V3D_EZ_UNDECIDED = 0,
1147    V3D_EZ_GT_GE,
1148    V3D_EZ_LT_LE,
1149    V3D_EZ_DISABLED,
1150 };
1151 
1152 enum v3dv_job_type {
1153    V3DV_JOB_TYPE_GPU_CL = 0,
1154    V3DV_JOB_TYPE_GPU_CL_SECONDARY,
1155    V3DV_JOB_TYPE_GPU_TFU,
1156    V3DV_JOB_TYPE_GPU_CSD,
1157    V3DV_JOB_TYPE_CPU_RESET_QUERIES,
1158    V3DV_JOB_TYPE_CPU_END_QUERY,
1159    V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
1160    V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
1161    V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
1162 };
1163 
1164 struct v3dv_reset_query_cpu_job_info {
1165    struct v3dv_query_pool *pool;
1166    uint32_t first;
1167    uint32_t count;
1168 };
1169 
1170 struct v3dv_end_query_info {
1171    struct v3dv_query_pool *pool;
1172    uint32_t query;
1173 
1174    /* This is one unless multiview is used */
1175    uint32_t count;
1176 };
1177 
1178 struct v3dv_copy_query_results_cpu_job_info {
1179    struct v3dv_query_pool *pool;
1180    uint32_t first;
1181    uint32_t count;
1182    struct v3dv_buffer *dst;
1183    uint32_t offset;
1184    uint32_t stride;
1185    VkQueryResultFlags flags;
1186 };
1187 
1188 struct v3dv_submit_sync_info {
1189    /* List of syncs to wait before running a job */
1190    uint32_t wait_count;
1191    struct vk_sync_wait *waits;
1192 
1193    /* List of syncs to signal when all jobs complete */
1194    uint32_t signal_count;
1195    struct vk_sync_signal *signals;
1196 };
1197 
1198 struct v3dv_csd_indirect_cpu_job_info {
1199    struct v3dv_buffer *buffer;
1200    uint32_t offset;
1201    struct v3dv_job *csd_job;
1202    uint32_t wg_size;
1203    uint32_t *wg_uniform_offsets[3];
1204    bool needs_wg_uniform_rewrite;
1205 };
1206 
1207 struct v3dv_timestamp_query_cpu_job_info {
1208    struct v3dv_query_pool *pool;
1209    uint32_t query;
1210 
1211    /* This is one unless multiview is used */
1212    uint32_t count;
1213 };
1214 
1215 /* Number of perfmons required to handle all supported performance counters */
1216 #define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_MAX_PERFCNT, \
1217                                        DRM_V3D_MAX_PERF_COUNTERS)
1218 
1219 struct v3dv_perf_query {
1220    uint32_t kperfmon_ids[V3DV_MAX_PERFMONS];
1221 
1222    /* A DRM syncobj to wait on the GPU jobs for which we are collecting
1223     * performance data.
1224     */
1225    struct vk_sync *last_job_sync;
1226 };
1227 
1228 struct v3dv_job {
1229    struct list_head list_link;
1230 
1231    /* We only create job clones when executing secondary command buffers into
1232     * primaries. These clones don't make deep copies of the original object
1233     * so we want to flag them to avoid freeing resources they don't own.
1234     */
1235    bool is_clone;
1236 
1237    /* If the job executes on the transfer stage of the pipeline */
1238    bool is_transfer;
1239 
1240    /* VK_KHR_buffer_device_address allows shaders to use pointers that can
1241     * dereference memory in any buffer that has been flagged with
1242     * VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT. These buffers may not
1243     * be bound via descriptor sets, so we need to make sure that a job that
1244     * uses this functionality includes all these buffers in its kernel
1245     * submission.
1246     */
1247    bool uses_buffer_device_address;
1248 
1249    /* True if we have not identified anything that would be incompatible
1250     * with double-buffer (like MSAA) or that would make double-buffer mode
1251     * not efficient (like tile loads or not having any stores).
1252     */
1253    bool can_use_double_buffer;
1254 
1255    /* This structure keeps track of various scores to inform a heuristic
1256     * for double-buffer mode.
1257     */
1258    struct {
1259       /* Cost of geometry shading */
1260       uint32_t geom;
1261       /* Cost of shader rendering */
1262       uint32_t render;
1263    } double_buffer_score;
1264 
1265    /* We only need to allocate tile state for all layers if the binner
1266     * writes primitives to layers other than the first. This can only be
1267     * done using layered rendering (writing gl_Layer from a geometry shader),
1268     * so for other cases of multilayered framebuffers (typically with
1269     * meta copy/clear operations) that won't use layered rendering, we only
1270     * need one layer worth of of tile state for the binner.
1271     */
1272    bool allocate_tile_state_for_all_layers;
1273 
1274    /* A pointer to the location of the TILE_BINNING_MODE_CFG packet so we can
1275     * rewrite it to enable double-buffer mode by the time we have enough info
1276     * about the job to make that decision.
1277     */
1278    struct v3dv_cl_out *bcl_tile_binning_mode_ptr;
1279 
1280    enum v3dv_job_type type;
1281 
1282    struct v3dv_device *device;
1283 
1284    struct v3dv_cmd_buffer *cmd_buffer;
1285 
1286    struct v3dv_cl bcl;
1287    struct v3dv_cl rcl;
1288    struct v3dv_cl indirect;
1289 
1290    /* Set of all BOs referenced by the job. This will be used for making
1291     * the list of BOs that the kernel will need to have paged in to
1292     * execute our job.
1293     */
1294    struct set *bos;
1295    uint32_t bo_count;
1296    uint64_t bo_handle_mask;
1297 
1298    struct v3dv_bo *tile_alloc;
1299    struct v3dv_bo *tile_state;
1300 
1301    bool tmu_dirty_rcl;
1302 
1303    uint32_t first_subpass;
1304 
1305    /* When the current subpass is split into multiple jobs, this flag is set
1306     * to true for any jobs after the first in the same subpass.
1307     */
1308    bool is_subpass_continue;
1309 
1310    /* If this job is the last job emitted for a subpass. */
1311    bool is_subpass_finish;
1312 
1313    struct v3dv_frame_tiling frame_tiling;
1314 
1315    enum v3dv_ez_state ez_state;
1316    enum v3dv_ez_state first_ez_state;
1317 
1318    /* If we have already decided if we need to disable Early Z/S completely
1319     * for this job.
1320     */
1321    bool decided_global_ez_enable;
1322 
1323    /* If the job emitted any draw calls with Early Z/S enabled */
1324    bool has_ez_draws;
1325 
1326    /* If this job has been configured to use early Z/S clear */
1327    bool early_zs_clear;
1328 
1329    /* Number of draw calls recorded into the job */
1330    uint32_t draw_count;
1331 
1332    /* A flag indicating whether we want to flush every draw separately. This
1333     * can be used for debugging, or for cases where special circumstances
1334     * require this behavior.
1335     */
1336    bool always_flush;
1337 
1338    /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
1339     * can use this to select the hw queues where we need to serialize the job.
1340     */
1341    uint8_t serialize;
1342 
1343    /* If this is a CL job, whether we should sync before binning */
1344    bool needs_bcl_sync;
1345 
1346    /* Job specs for CPU jobs */
1347    union {
1348       struct v3dv_reset_query_cpu_job_info          query_reset;
1349       struct v3dv_end_query_info                    query_end;
1350       struct v3dv_copy_query_results_cpu_job_info   query_copy_results;
1351       struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
1352       struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
1353    } cpu;
1354 
1355    /* Job specs for TFU jobs */
1356    struct drm_v3d_submit_tfu tfu;
1357 
1358    /* Job specs for CSD jobs */
1359    struct {
1360       struct v3dv_bo *shared_memory;
1361       uint32_t wg_count[3];
1362       uint32_t wg_base[3];
1363       struct drm_v3d_submit_csd submit;
1364    } csd;
1365 
1366    /* Perfmons with last job sync for CSD and CL jobs */
1367    struct v3dv_perf_query *perf;
1368 };
1369 
1370 void v3dv_job_init(struct v3dv_job *job,
1371                    enum v3dv_job_type type,
1372                    struct v3dv_device *device,
1373                    struct v3dv_cmd_buffer *cmd_buffer,
1374                    int32_t subpass_idx);
1375 void v3dv_job_destroy(struct v3dv_job *job);
1376 
1377 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1378 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1379 
1380 void v3dv_job_start_frame(struct v3dv_job *job,
1381                           uint32_t width,
1382                           uint32_t height,
1383                           uint32_t layers,
1384                           bool allocate_tile_state_for_all_layers,
1385                           bool allocate_tile_state_now,
1386                           uint32_t render_target_count,
1387                           uint8_t max_internal_bpp,
1388                           uint8_t total_color_bpp,
1389                           bool msaa);
1390 
1391 bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1392 
1393 struct v3dv_job *
1394 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1395                              struct v3dv_cmd_buffer *cmd_buffer);
1396 
1397 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1398                                                 enum v3dv_job_type type,
1399                                                 struct v3dv_cmd_buffer *cmd_buffer,
1400                                                 uint32_t subpass_idx);
1401 
1402 void
1403 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1404                                    uint32_t slot_size,
1405                                    uint32_t used_count,
1406                                    uint32_t *alloc_count,
1407                                    void **ptr);
1408 
1409 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
1410                                    bool indexed, bool indirect,
1411                                    uint32_t vertex_count);
1412 
1413 bool v3dv_job_allocate_tile_state(struct v3dv_job *job);
1414 
1415 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1416  * cmd_buffer specific header?
1417  */
1418 struct v3dv_draw_info {
1419    uint32_t vertex_count;
1420    uint32_t instance_count;
1421    uint32_t first_vertex;
1422    uint32_t first_instance;
1423 };
1424 
1425 struct v3dv_vertex_binding {
1426    struct v3dv_buffer *buffer;
1427    VkDeviceSize offset;
1428 };
1429 
1430 struct v3dv_descriptor_state {
1431    struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1432    uint32_t valid;
1433    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1434 };
1435 
1436 struct v3dv_cmd_pipeline_state {
1437    struct v3dv_pipeline *pipeline;
1438 
1439    struct v3dv_descriptor_state descriptor_state;
1440 };
1441 
1442 enum {
1443    V3DV_BARRIER_GRAPHICS_BIT = (1 << 0),
1444    V3DV_BARRIER_COMPUTE_BIT  = (1 << 1),
1445    V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
1446    V3DV_BARRIER_CPU_BIT      = (1 << 3),
1447 };
1448 #define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
1449                           V3DV_BARRIER_TRANSFER_BIT | \
1450                           V3DV_BARRIER_COMPUTE_BIT | \
1451                           V3DV_BARRIER_CPU_BIT);
1452 
1453 struct v3dv_barrier_state {
1454    /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
1455    uint8_t dst_mask;
1456 
1457    /* For each possible consumer of a barrier, a mask of V3DV_BARRIER_*
1458     * indicating the sources of the dependency.
1459     */
1460    uint8_t src_mask_graphics;
1461    uint8_t src_mask_transfer;
1462    uint8_t src_mask_compute;
1463 
1464    /* For graphics barriers, access masks involved. Used to decide if we need
1465     * to execute a binning or render barrier.
1466     */
1467    VkAccessFlags2 bcl_buffer_access;
1468    VkAccessFlags2 bcl_image_access;
1469 };
1470 
1471 struct v3dv_cmd_buffer_state {
1472    struct v3dv_render_pass *pass;
1473    struct v3dv_framebuffer *framebuffer;
1474    VkRect2D render_area;
1475 
1476    /* Current job being recorded */
1477    struct v3dv_job *job;
1478 
1479    uint32_t subpass_idx;
1480 
1481    struct v3dv_cmd_pipeline_state gfx;
1482    struct v3dv_cmd_pipeline_state compute;
1483 
1484    struct v3dv_dynamic_state dynamic;
1485 
1486    uint32_t dirty;
1487    VkShaderStageFlagBits dirty_descriptor_stages;
1488    VkShaderStageFlagBits dirty_push_constants_stages;
1489 
1490    /* Current clip window. We use this to check whether we have an active
1491     * scissor, since in that case we can't use TLB clears and need to fallback
1492     * to drawing rects.
1493     */
1494    VkRect2D clip_window;
1495 
1496    /* Whether our render area is aligned to tile boundaries. If this is false
1497     * then we have tiles that are only partially covered by the render area,
1498     * and therefore, we need to be careful with our loads and stores so we don't
1499     * modify pixels for the tile area that is not covered by the render area.
1500     * This means, for example, that we can't use the TLB to clear, since that
1501     * always clears full tiles.
1502     */
1503    bool tile_aligned_render_area;
1504 
1505    /* FIXME: we have just one client-side BO for the push constants,
1506     * independently of the stageFlags in vkCmdPushConstants, and the
1507     * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1508     * tuning in the future if it makes sense.
1509     */
1510    uint32_t push_constants_size;
1511    uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1512 
1513    uint32_t attachment_alloc_count;
1514    struct v3dv_cmd_buffer_attachment_state *attachments;
1515 
1516    struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1517 
1518    struct {
1519       VkBuffer buffer;
1520       VkDeviceSize offset;
1521       uint8_t index_size;
1522    } index_buffer;
1523 
1524    /* Current uniforms */
1525    struct {
1526       struct v3dv_cl_reloc vs_bin;
1527       struct v3dv_cl_reloc vs;
1528       struct v3dv_cl_reloc gs_bin;
1529       struct v3dv_cl_reloc gs;
1530       struct v3dv_cl_reloc fs;
1531    } uniforms;
1532 
1533    /* Current view index for multiview rendering */
1534    uint32_t view_index;
1535 
1536    /* Current draw ID for multidraw */
1537    uint32_t draw_id;
1538 
1539    /* Used to flag OOM conditions during command buffer recording */
1540    bool oom;
1541 
1542    /* If we are currently recording job(s) for a transfer operation */
1543    bool is_transfer;
1544 
1545    /* Barrier state tracking */
1546    struct v3dv_barrier_state barrier;
1547 
1548    /* Secondary command buffer state */
1549    struct {
1550       bool occlusion_query_enable;
1551    } inheritance;
1552 
1553    /* Command buffer state saved during a meta operation */
1554    struct {
1555       uint32_t subpass_idx;
1556       VkRenderPass pass;
1557       VkFramebuffer framebuffer;
1558 
1559       uint32_t attachment_alloc_count;
1560       uint32_t attachment_count;
1561       struct v3dv_cmd_buffer_attachment_state *attachments;
1562 
1563       bool tile_aligned_render_area;
1564       VkRect2D render_area;
1565 
1566       struct v3dv_dynamic_state dynamic;
1567 
1568       struct v3dv_cmd_pipeline_state gfx;
1569       bool has_descriptor_state;
1570 
1571       uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1572       uint32_t push_constants_size;
1573    } meta;
1574 
1575    /* Command buffer state for queries */
1576    struct {
1577       /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1578        * a render pass. We queue these here and then schedule the corresponding
1579        * CPU jobs for them at the time we finish the GPU job in which they have
1580        * been recorded.
1581        */
1582       struct {
1583          uint32_t used_count;
1584          uint32_t alloc_count;
1585          struct v3dv_end_query_info *states;
1586       } end;
1587 
1588       struct {
1589          /* This BO is not NULL if we have an active occlusion query, that is,
1590           * we have called vkCmdBeginQuery but not vkCmdEndQuery.
1591           */
1592          struct v3dv_bo *bo;
1593          uint32_t offset;
1594          /* When the driver emits draw calls to implement other operations in
1595           * the middle of a render pass (such as an attachment clear), we need
1596           * to pause occlusion query recording and resume it later so that
1597           * these draw calls don't register in occlussion counters. We use
1598           * this to store the BO reference in which we should resume occlusion
1599           * query counters after the driver is done emitting its draw calls.
1600            */
1601          struct v3dv_bo *paused_bo;
1602 
1603          /* This pointer is not NULL if we have an active performance query */
1604          struct v3dv_perf_query *perf;
1605       } active_query;
1606    } query;
1607 };
1608 
1609 void
1610 v3dv_cmd_buffer_state_get_viewport_z_xform(struct v3dv_cmd_buffer_state *state,
1611                                            uint32_t vp_idx,
1612                                            float *translate_z, float *scale_z);
1613 
1614 /* The following struct represents the info from a descriptor that we store on
1615  * the host memory. They are mostly links to other existing vulkan objects,
1616  * like the image_view in order to access to swizzle info, or the buffer used
1617  * for a UBO/SSBO, for example.
1618  *
1619  * FIXME: revisit if makes sense to just move everything that would be needed
1620  * from a descriptor to the bo.
1621  */
1622 struct v3dv_descriptor {
1623    VkDescriptorType type;
1624 
1625    union {
1626       struct {
1627          struct v3dv_image_view *image_view;
1628          struct v3dv_sampler *sampler;
1629       };
1630 
1631       struct {
1632          struct v3dv_buffer *buffer;
1633          size_t offset;
1634          size_t range;
1635       };
1636 
1637       struct v3dv_buffer_view *buffer_view;
1638    };
1639 };
1640 
1641 struct v3dv_query {
1642    /* Used by queries where we implement result copying in the CPU so we can
1643     * tell if the relevant jobs have been submitted for execution. Currently
1644     * these are all but occlusion queries.
1645     */
1646    bool maybe_available;
1647 
1648    union {
1649       /* Used by occlusion queries */
1650       struct {
1651          /* Offset of this query in the occlusion query counter BO */
1652          uint32_t offset;
1653       } occlusion;
1654 
1655       /* Used by timestamp queries */
1656       struct {
1657          /* Offset of this query in the timestamp BO for its value */
1658          uint32_t offset;
1659 
1660          /* Syncobj to signal timestamp query availability */
1661          struct vk_sync *sync;
1662       } timestamp;
1663 
1664       /* Used by performance queries */
1665       struct v3dv_perf_query perf;
1666    };
1667 };
1668 
1669 struct v3dv_query_pool {
1670    struct vk_object_base base;
1671 
1672    /* Per-pool Vulkan resources required to implement GPU-side query
1673     * functions (only occlusion queries for now).
1674     */
1675    struct {
1676       /* Buffer to access the BO with the occlusion query results and
1677        * availability info.
1678        */
1679       VkBuffer buf;
1680       VkDeviceMemory mem;
1681 
1682       /* Descriptor set for accessing the buffer from a pipeline. */
1683       VkDescriptorPool descriptor_pool;
1684       VkDescriptorSet descriptor_set;
1685    } meta;
1686 
1687    /* Only used with occlusion queries */
1688    struct {
1689       /* BO with the occlusion counters and query availability */
1690       struct v3dv_bo *bo;
1691       /* Offset of the availability info in the BO */
1692       uint32_t avail_offset;
1693    } occlusion;
1694 
1695    /* Only used with timestamp queries */
1696    struct {
1697       /* BO with the query timestamp values */
1698       struct v3dv_bo *bo;
1699    } timestamp;
1700 
1701    /* Only used with performance queries */
1702    struct {
1703       uint32_t ncounters;
1704       uint8_t counters[V3D_MAX_PERFCNT];
1705 
1706       /* V3D has a limit on the number of counters we can track in a
1707        * single performance monitor, so if too many counters are requested
1708        * we need to create multiple monitors to record all of them. This
1709        * field represents the number of monitors required for the number
1710        * of counters requested.
1711        */
1712       uint8_t nperfmons;
1713    } perfmon;
1714 
1715    VkQueryType query_type;
1716    uint32_t query_count;
1717    struct v3dv_query *queries;
1718 };
1719 
1720 VkResult
1721 v3dv_query_allocate_resources(struct v3dv_device *decice);
1722 
1723 void
1724 v3dv_query_free_resources(struct v3dv_device *decice);
1725 
1726 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1727                                          struct v3dv_query_pool *pool,
1728                                          uint32_t first,
1729                                          uint32_t count,
1730                                          void *data,
1731                                          VkDeviceSize stride,
1732                                          VkQueryResultFlags flags);
1733 
1734 void v3dv_reset_query_pool_cpu(struct v3dv_device *device,
1735                                struct v3dv_query_pool *query_pool,
1736                                uint32_t first,
1737                                uint32_t last);
1738 
1739 void v3dv_cmd_buffer_emit_set_query_availability(struct v3dv_cmd_buffer *cmd_buffer,
1740                                                  struct v3dv_query_pool *pool,
1741                                                  uint32_t query, uint32_t count,
1742                                                  uint8_t availability);
1743 
1744 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1745                                                        uint64_t pobj,
1746                                                        VkAllocationCallbacks *alloc);
1747 struct v3dv_cmd_buffer_private_obj {
1748    struct list_head list_link;
1749    uint64_t obj;
1750    v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1751 };
1752 
1753 extern const struct vk_command_buffer_ops v3dv_cmd_buffer_ops;
1754 
1755 struct v3dv_cmd_buffer {
1756    struct vk_command_buffer vk;
1757 
1758    struct v3dv_device *device;
1759 
1760    VkCommandBufferUsageFlags usage_flags;
1761 
1762    enum v3dv_cmd_buffer_status status;
1763 
1764    struct v3dv_cmd_buffer_state state;
1765 
1766    /* Buffer where we upload push constant data to resolve indirect indexing */
1767    struct v3dv_cl_reloc push_constants_resource;
1768 
1769    /* Collection of Vulkan objects created internally by the driver (typically
1770     * during recording of meta operations) that are part of the command buffer
1771     * and should be destroyed with it.
1772     */
1773    struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1774 
1775    /* Per-command buffer resources for meta operations. */
1776    struct {
1777       struct {
1778          /* The current descriptor pool for blit sources */
1779          VkDescriptorPool dspool;
1780       } blit;
1781       struct {
1782          /* The current descriptor pool for texel buffer copy sources */
1783          VkDescriptorPool dspool;
1784       } texel_buffer_copy;
1785       struct {
1786          /* The current descriptor pool for the copy query results output buffer */
1787          VkDescriptorPool dspool;
1788       } query;
1789    } meta;
1790 
1791    /* List of jobs in the command buffer. For primary command buffers it
1792     * represents the jobs we want to submit to the GPU. For secondary command
1793     * buffers it represents jobs that will be merged into a primary command
1794     * buffer via vkCmdExecuteCommands.
1795     */
1796    struct list_head jobs;
1797 };
1798 
1799 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1800                                            int32_t subpass_idx,
1801                                            enum v3dv_job_type type);
1802 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1803 
1804 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1805                                                uint32_t subpass_idx);
1806 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1807                                                 uint32_t subpass_idx);
1808 
1809 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1810 
1811 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1812                                      bool push_descriptor_state);
1813 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1814                                     bool needs_subpass_resume);
1815 
1816 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1817                                  struct v3dv_query_pool *pool,
1818                                  uint32_t query,
1819                                  VkQueryControlFlags flags);
1820 
1821 void v3dv_cmd_buffer_pause_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1822 void v3dv_cmd_buffer_resume_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1823 
1824 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1825                                struct v3dv_query_pool *pool,
1826                                uint32_t query);
1827 
1828 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1829                                         struct v3dv_query_pool *pool,
1830                                         uint32_t first,
1831                                         uint32_t count,
1832                                         struct v3dv_buffer *dst,
1833                                         uint32_t offset,
1834                                         uint32_t stride,
1835                                         VkQueryResultFlags flags);
1836 
1837 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1838                                  struct drm_v3d_submit_tfu *tfu);
1839 
1840 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_device *device,
1841                                               struct v3dv_csd_indirect_cpu_job_info *info,
1842                                               const uint32_t *wg_counts);
1843 
1844 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1845                                      uint64_t obj,
1846                                      v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1847 
1848 void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst,
1849                                          struct v3dv_barrier_state *src);
1850 
1851 void v3dv_cmd_buffer_consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer,
1852                                       struct v3dv_job *job);
1853 
1854 bool v3dv_cmd_buffer_check_needs_load(const struct v3dv_cmd_buffer_state *state,
1855                                       VkImageAspectFlags aspect,
1856                                       uint32_t first_subpass_idx,
1857                                       VkAttachmentLoadOp load_op,
1858                                       uint32_t last_subpass_idx,
1859                                       VkAttachmentStoreOp store_op);
1860 
1861 bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state,
1862                                        VkImageAspectFlags aspect,
1863                                        uint32_t last_subpass_idx,
1864                                        VkAttachmentStoreOp store_op);
1865 
1866 void v3dv_cmd_buffer_emit_pipeline_barrier(struct v3dv_cmd_buffer *cmd_buffer,
1867                                            const VkDependencyInfo *info);
1868 
1869 bool v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1870                                     struct v3dv_image *dst,
1871                                     struct v3dv_image *src,
1872                                     const VkImageCopy2 *region);
1873 
1874 struct v3dv_event {
1875    struct vk_object_base base;
1876 
1877    /* Link in the device list of pre-allocated free events */
1878    struct list_head link;
1879 
1880    /* Each event gets a different index, which we use to compute the offset
1881     * in the BO we use to track their state (signaled vs reset).
1882     */
1883    uint32_t index;
1884 };
1885 
1886 VkResult
1887 v3dv_event_allocate_resources(struct v3dv_device *device);
1888 
1889 void
1890 v3dv_event_free_resources(struct v3dv_device *device);
1891 
1892 struct v3dv_shader_variant {
1893    enum broadcom_shader_stage stage;
1894 
1895    union {
1896       struct v3d_prog_data *base;
1897       struct v3d_vs_prog_data *vs;
1898       struct v3d_gs_prog_data *gs;
1899       struct v3d_fs_prog_data *fs;
1900       struct v3d_compute_prog_data *cs;
1901    } prog_data;
1902 
1903    /* We explicitly save the prog_data_size as it would make easier to
1904     * serialize
1905     */
1906    uint32_t prog_data_size;
1907 
1908    /* The assembly for this variant will be uploaded to a BO shared with all
1909     * other shader stages in that pipeline. This is the offset in that BO.
1910     */
1911    uint32_t assembly_offset;
1912 
1913    /* Note: don't assume qpu_insts to be always NULL or not-NULL. In general
1914     * we will try to free it as soon as we upload it to the shared bo while we
1915     * compile the different stages. But we can decide to keep it around based
1916     * on some pipeline creation flags, like
1917     * VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT.
1918     */
1919    uint64_t *qpu_insts;
1920    uint32_t qpu_insts_size;
1921 };
1922 
1923 /*
1924  * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1925  * other methods doesn't have so many parameters.
1926  *
1927  * FIXME: for the case of the coordinate shader and the vertex shader, module,
1928  * entrypoint, spec_info and nir are the same. There are also info only
1929  * relevant to some stages. But seemed too much a hassle to create a new
1930  * struct only to handle that. Revisit if such kind of info starts to grow.
1931  */
1932 struct v3dv_pipeline_stage {
1933    struct v3dv_pipeline *pipeline;
1934 
1935    enum broadcom_shader_stage stage;
1936 
1937    const struct vk_shader_module *module;
1938    const char *entrypoint;
1939    const VkSpecializationInfo *spec_info;
1940 
1941    nir_shader *nir;
1942 
1943    /* The following is the combined hash of module+entrypoint+spec_info+nir */
1944    unsigned char shader_sha1[20];
1945 
1946    /** A name for this program, so you can track it in shader-db output. */
1947    uint32_t program_id;
1948 
1949    VkPipelineCreationFeedback feedback;
1950 
1951    struct vk_pipeline_robustness_state robustness;
1952 };
1953 
1954 /* We are using the descriptor pool entry for two things:
1955  * * Track the allocated sets, so we can properly free it if needed
1956  * * Track the suballocated pool bo regions, so if some descriptor set is
1957  *   freed, the gap could be reallocated later.
1958  *
1959  * Those only make sense if the pool was not created with the flag
1960  * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1961  */
1962 struct v3dv_descriptor_pool_entry
1963 {
1964    struct v3dv_descriptor_set *set;
1965    /* Offset and size of the subregion allocated for this entry from the
1966     * pool->bo
1967     */
1968    uint32_t offset;
1969    uint32_t size;
1970 };
1971 
1972 struct v3dv_descriptor_pool {
1973    struct vk_object_base base;
1974 
1975    /* A list with all descriptor sets allocated from the pool. */
1976    struct list_head set_list;
1977 
1978    /* If this descriptor pool has been allocated for the driver for internal
1979     * use, typically to implement meta operations.
1980     */
1981    bool is_driver_internal;
1982 
1983    struct v3dv_bo *bo;
1984    /* Current offset at the descriptor bo. 0 means that we didn't use it for
1985     * any descriptor. If the descriptor bo is NULL, current offset is
1986     * meaningless
1987     */
1988    uint32_t current_offset;
1989 
1990    /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1991     * descriptor sets are handled as a whole as pool memory and handled by the
1992     * following pointers. If set, they are not used, and individually
1993     * descriptor sets are allocated/freed.
1994     */
1995    uint8_t *host_memory_base;
1996    uint8_t *host_memory_ptr;
1997    uint8_t *host_memory_end;
1998 
1999    uint32_t entry_count;
2000    uint32_t max_entry_count;
2001    struct v3dv_descriptor_pool_entry entries[0];
2002 };
2003 
2004 struct v3dv_descriptor_set {
2005    struct vk_object_base base;
2006 
2007    /* List link into the list of all sets allocated from the pool */
2008    struct list_head pool_link;
2009 
2010    struct v3dv_descriptor_pool *pool;
2011 
2012    struct v3dv_descriptor_set_layout *layout;
2013 
2014    /* Offset relative to the descriptor pool bo for this set */
2015    uint32_t base_offset;
2016 
2017    /* The descriptors below can be indexed (set/binding) using the set_layout
2018     */
2019    struct v3dv_descriptor descriptors[0];
2020 };
2021 
2022 struct v3dv_descriptor_set_binding_layout {
2023    VkDescriptorType type;
2024 
2025    /* Number of array elements in this binding */
2026    uint32_t array_size;
2027 
2028    /* Index into the flattened descriptor set */
2029    uint32_t descriptor_index;
2030 
2031    uint32_t dynamic_offset_count;
2032    uint32_t dynamic_offset_index;
2033 
2034    /* Offset into the descriptor set where this descriptor lives (final offset
2035     * on the descriptor bo need to take into account set->base_offset)
2036     */
2037    uint32_t descriptor_offset;
2038 
2039    /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
2040     * if there are no immutable samplers.
2041     */
2042    uint32_t immutable_samplers_offset;
2043 
2044    /* Descriptors for multiplanar combined image samplers are larger.
2045     * For mutable descriptors, this is always 1.
2046     */
2047    uint8_t plane_stride;
2048 };
2049 
2050 struct v3dv_descriptor_set_layout {
2051    struct vk_object_base base;
2052 
2053    VkDescriptorSetLayoutCreateFlags flags;
2054 
2055    /* Number of bindings in this descriptor set */
2056    uint32_t binding_count;
2057 
2058    /* Total bo size needed for this descriptor set
2059     */
2060    uint32_t bo_size;
2061 
2062    /* Shader stages affected by this descriptor set */
2063    uint16_t shader_stages;
2064 
2065    /* Number of descriptors in this descriptor set */
2066    uint32_t descriptor_count;
2067 
2068    /* Number of dynamic offsets used by this descriptor set */
2069    uint16_t dynamic_offset_count;
2070 
2071    /* Descriptor set layouts can be destroyed even if they are still being
2072     * used.
2073     */
2074    uint32_t ref_cnt;
2075 
2076    /* Bindings in this descriptor set */
2077    struct v3dv_descriptor_set_binding_layout binding[0];
2078 };
2079 
2080 void
2081 v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
2082                                    struct v3dv_descriptor_set_layout *set_layout);
2083 
2084 static inline void
v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout * set_layout)2085 v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
2086 {
2087    assert(set_layout && set_layout->ref_cnt >= 1);
2088    p_atomic_inc(&set_layout->ref_cnt);
2089 }
2090 
2091 static inline void
v3dv_descriptor_set_layout_unref(struct v3dv_device * device,struct v3dv_descriptor_set_layout * set_layout)2092 v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
2093                                  struct v3dv_descriptor_set_layout *set_layout)
2094 {
2095    assert(set_layout && set_layout->ref_cnt >= 1);
2096    if (p_atomic_dec_zero(&set_layout->ref_cnt))
2097       v3dv_descriptor_set_layout_destroy(device, set_layout);
2098 }
2099 
2100 struct v3dv_pipeline_layout {
2101    struct vk_object_base base;
2102 
2103    struct {
2104       struct v3dv_descriptor_set_layout *layout;
2105       uint32_t dynamic_offset_start;
2106    } set[MAX_SETS];
2107 
2108    uint32_t num_sets;
2109 
2110    /* Shader stages that are declared to use descriptors from this layout */
2111    uint32_t shader_stages;
2112 
2113    uint32_t dynamic_offset_count;
2114    uint32_t push_constant_size;
2115 
2116    /* Pipeline layouts can be destroyed after creating pipelines since
2117     * maintenance4.
2118     */
2119    uint32_t ref_cnt;
2120 
2121    unsigned char sha1[20];
2122 };
2123 
2124 void
2125 v3dv_pipeline_layout_destroy(struct v3dv_device *device,
2126                              struct v3dv_pipeline_layout *layout,
2127                              const VkAllocationCallbacks *alloc);
2128 
2129 static inline void
v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout * layout)2130 v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout *layout)
2131 {
2132    assert(layout && layout->ref_cnt >= 1);
2133    p_atomic_inc(&layout->ref_cnt);
2134 }
2135 
2136 static inline void
v3dv_pipeline_layout_unref(struct v3dv_device * device,struct v3dv_pipeline_layout * layout,const VkAllocationCallbacks * alloc)2137 v3dv_pipeline_layout_unref(struct v3dv_device *device,
2138                            struct v3dv_pipeline_layout *layout,
2139                            const VkAllocationCallbacks *alloc)
2140 {
2141    assert(layout && layout->ref_cnt >= 1);
2142    if (p_atomic_dec_zero(&layout->ref_cnt))
2143       v3dv_pipeline_layout_destroy(device, layout, alloc);
2144 }
2145 
2146 /*
2147  * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
2148  * it to be big enough to include the max value for all of them.
2149  *
2150  * FIXME: one alternative would be to allocate the map as big as you need for
2151  * each descriptor type. That would means more individual allocations.
2152  */
2153 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS,                         \
2154                                  MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
2155                                  MAX_STORAGE_BUFFERS)
2156 
2157 
2158 struct v3dv_descriptor_map {
2159    /* FIXME: avoid fixed size array/justify the size */
2160    unsigned num_desc; /* Number of descriptors  */
2161    int set[DESCRIPTOR_MAP_SIZE];
2162    int binding[DESCRIPTOR_MAP_SIZE];
2163    int array_index[DESCRIPTOR_MAP_SIZE];
2164    int array_size[DESCRIPTOR_MAP_SIZE];
2165    uint8_t plane[DESCRIPTOR_MAP_SIZE];
2166    bool used[DESCRIPTOR_MAP_SIZE];
2167 
2168    /* NOTE: the following is only for sampler, but this is the easier place to
2169     * put it.
2170     */
2171    uint8_t return_size[DESCRIPTOR_MAP_SIZE];
2172 };
2173 
2174 struct v3dv_sampler {
2175    struct vk_object_base base;
2176    struct vk_ycbcr_conversion *conversion;
2177 
2178    bool compare_enable;
2179    bool unnormalized_coordinates;
2180 
2181    /* Prepacked per plane SAMPLER_STATE, that is referenced as part of the tmu
2182     * configuration. If needed it will be copied to the descriptor info during
2183     * UpdateDescriptorSets
2184     */
2185    uint8_t plane_count;
2186    uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
2187 };
2188 
2189 /* We keep two special values for the sampler idx that represents exactly when a
2190  * sampler is not needed/provided. The main use is that even if we don't have
2191  * sampler, we still need to do the output unpacking (through
2192  * nir_lower_tex). The easier way to do this is to add those special "no
2193  * sampler" in the sampler_map, and then use the proper unpacking for that
2194  * case.
2195  *
2196  * We have one when we want a 16bit output size, and other when we want a
2197  * 32bit output size. We use the info coming from the RelaxedPrecision
2198  * decoration to decide between one and the other.
2199  */
2200 #define V3DV_NO_SAMPLER_16BIT_IDX 0
2201 #define V3DV_NO_SAMPLER_32BIT_IDX 1
2202 
2203 struct v3dv_descriptor_maps {
2204    struct v3dv_descriptor_map ubo_map;
2205    struct v3dv_descriptor_map ssbo_map;
2206    struct v3dv_descriptor_map sampler_map;
2207    struct v3dv_descriptor_map texture_map;
2208 };
2209 
2210 /* The structure represents data shared between different objects, like the
2211  * pipeline and the pipeline cache, so we ref count it to know when it should
2212  * be freed.
2213  */
2214 struct v3dv_pipeline_shared_data {
2215    uint32_t ref_cnt;
2216 
2217    unsigned char sha1_key[20];
2218 
2219    struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
2220    struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
2221 
2222    struct v3dv_bo *assembly_bo;
2223 };
2224 
2225 struct v3dv_pipeline_executable_data {
2226    enum broadcom_shader_stage stage;
2227    char *nir_str;
2228    char *qpu_str;
2229 };
2230 
2231 struct v3dv_pipeline {
2232    struct vk_object_base base;
2233 
2234    struct v3dv_device *device;
2235 
2236    VkShaderStageFlags active_stages;
2237    VkPipelineCreateFlags flags;
2238 
2239    struct v3dv_render_pass *pass;
2240    struct v3dv_subpass *subpass;
2241 
2242    struct v3dv_pipeline_stage *stages[BROADCOM_SHADER_STAGES];
2243 
2244    /* Flags for whether optional pipeline stages are present, for convenience */
2245    bool has_gs;
2246 
2247    /* Whether any stage in this pipeline uses VK_KHR_buffer_device_address */
2248    bool uses_buffer_device_address;
2249 
2250    /* Spilling memory requirements */
2251    struct {
2252       struct v3dv_bo *bo;
2253       uint32_t size_per_thread;
2254    } spill;
2255 
2256    struct v3dv_dynamic_state dynamic_state;
2257 
2258    struct v3dv_pipeline_layout *layout;
2259 
2260    /* Whether this pipeline enables depth writes */
2261    bool z_updates_enable;
2262 
2263    enum v3dv_ez_state ez_state;
2264 
2265    /* If ez_state is V3D_EZ_DISABLED, if the reason for disabling is that the
2266     * pipeline selects an incompatible depth test function.
2267     */
2268    bool incompatible_ez_test;
2269 
2270    bool msaa;
2271    bool sample_rate_shading;
2272    uint32_t sample_mask;
2273 
2274    bool primitive_restart;
2275    bool negative_one_to_one;
2276 
2277    /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
2278     * array with such binding
2279     */
2280    struct v3dv_pipeline_vertex_binding {
2281       uint32_t stride;
2282       uint32_t instance_divisor;
2283    } vb[MAX_VBS];
2284    uint32_t vb_count;
2285 
2286    /* Note that a lot of info from VkVertexInputAttributeDescription is
2287     * already prepacked, so here we are only storing those that need recheck
2288     * later. The array must be indexed by driver location, since that is the
2289     * order in which we need to emit the attributes.
2290     */
2291    struct v3dv_pipeline_vertex_attrib {
2292       uint32_t binding;
2293       uint32_t offset;
2294       VkFormat vk_format;
2295    } va[MAX_VERTEX_ATTRIBS];
2296    uint32_t va_count;
2297 
2298    enum mesa_prim topology;
2299 
2300    struct v3dv_pipeline_shared_data *shared_data;
2301 
2302    /* It is the combined stages sha1, layout sha1, plus the pipeline key sha1. */
2303    unsigned char sha1[20];
2304 
2305    /* In general we can reuse v3dv_device->default_attribute_float, so note
2306     * that the following can be NULL. In 7.x this is not used, so it will be
2307     * always NULL.
2308     *
2309     * FIXME: the content of this BO will be small, so it could be improved to
2310     * be uploaded to a common BO. But as in most cases it will be NULL, it is
2311     * not a priority.
2312     */
2313    struct v3dv_bo *default_attribute_values;
2314 
2315    struct vpm_config vpm_cfg;
2316    struct vpm_config vpm_cfg_bin;
2317 
2318    /* If the pipeline should emit any of the stencil configuration packets */
2319    bool emit_stencil_cfg[2];
2320 
2321    /* Blend state */
2322    struct {
2323       /* Per-RT bit mask with blend enables */
2324       uint8_t enables;
2325       /* Per-RT prepacked blend config packets */
2326       uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
2327       /* Flag indicating whether the blend factors in use require
2328        * color constants.
2329        */
2330       bool needs_color_constants;
2331       /* Mask with enabled color channels for each RT (4 bits per RT) */
2332       uint32_t color_write_masks;
2333    } blend;
2334 
2335    /* Depth bias */
2336    struct {
2337       bool enabled;
2338       bool is_z16;
2339    } depth_bias;
2340 
2341    /* Depth bounds */
2342    bool depth_bounds_test_enabled;
2343 
2344    struct {
2345       void *mem_ctx;
2346       struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */
2347    } executables;
2348 
2349    /* Packets prepacked during pipeline creation
2350     */
2351    uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
2352    uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
2353    uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
2354    uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
2355                         MAX_VERTEX_ATTRIBS];
2356    uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
2357 };
2358 
2359 static inline bool
v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device * device)2360 v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device *device)
2361 {
2362    return device->devinfo.ver > 71 ||
2363           (device->devinfo.ver == 71 && device->devinfo.rev >= 5);
2364 }
2365 
2366 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)2367 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
2368 {
2369    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
2370           !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
2371    return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
2372       VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
2373 }
2374 
2375 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)2376 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
2377                                      struct v3dv_pipeline *pipeline)
2378 {
2379    if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
2380       return &cmd_buffer->state.compute.descriptor_state;
2381    else
2382       return &cmd_buffer->state.gfx.descriptor_state;
2383 }
2384 
2385 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
2386 
2387 uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
2388 uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
2389 
2390 #define v3dv_debug_ignored_stype(sType) \
2391    mesa_logd("%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
2392 
2393 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f,
2394                                        uint8_t plane);
2395 const struct v3dv_format *
2396 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
2397                                uint32_t bpp, VkFormat *out_vk_format);
2398 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
2399                                           VkFormat vk_format,
2400                                           VkFormatFeatureFlags2 features);
2401 
2402 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2403                                          struct v3dv_pipeline *pipeline,
2404                                          struct v3dv_shader_variant *variant);
2405 
2406 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
2407                                                     struct v3dv_pipeline *pipeline,
2408                                                     struct v3dv_shader_variant *variant,
2409                                                     uint32_t **wg_count_offsets);
2410 
2411 struct v3dv_shader_variant *
2412 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
2413                         struct v3dv_pipeline_cache *cache,
2414                         struct v3d_key *key,
2415                         size_t key_size,
2416                         const VkAllocationCallbacks *pAllocator,
2417                         VkResult *out_vk_result);
2418 
2419 struct v3dv_shader_variant *
2420 v3dv_shader_variant_create(struct v3dv_device *device,
2421                            enum broadcom_shader_stage stage,
2422                            struct v3d_prog_data *prog_data,
2423                            uint32_t prog_data_size,
2424                            uint32_t assembly_offset,
2425                            uint64_t *qpu_insts,
2426                            uint32_t qpu_insts_size,
2427                            VkResult *out_vk_result);
2428 
2429 void
2430 v3dv_shader_variant_destroy(struct v3dv_device *device,
2431                             struct v3dv_shader_variant *variant);
2432 
2433 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)2434 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2435 {
2436    assert(shared_data && shared_data->ref_cnt >= 1);
2437    p_atomic_inc(&shared_data->ref_cnt);
2438 }
2439 
2440 void
2441 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2442                                   struct v3dv_pipeline_shared_data *shared_data);
2443 
2444 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)2445 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2446                                 struct v3dv_pipeline_shared_data *shared_data)
2447 {
2448    assert(shared_data && shared_data->ref_cnt >= 1);
2449    if (p_atomic_dec_zero(&shared_data->ref_cnt))
2450       v3dv_pipeline_shared_data_destroy(device, shared_data);
2451 }
2452 
2453 struct v3dv_descriptor *
2454 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2455                                    struct v3dv_descriptor_map *map,
2456                                    struct v3dv_pipeline_layout *pipeline_layout,
2457                                    uint32_t index,
2458                                    uint32_t *dynamic_offset);
2459 
2460 struct v3dv_cl_reloc
2461 v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2462                                       struct v3dv_descriptor_state *descriptor_state,
2463                                       struct v3dv_descriptor_map *map,
2464                                       struct v3dv_pipeline_layout *pipeline_layout,
2465                                       uint32_t index,
2466                                       VkDescriptorType *out_type);
2467 
2468 const struct v3dv_sampler *
2469 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2470                                 struct v3dv_descriptor_map *map,
2471                                 struct v3dv_pipeline_layout *pipeline_layout,
2472                                 uint32_t index);
2473 
2474 struct v3dv_cl_reloc
2475 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2476                                       struct v3dv_descriptor_state *descriptor_state,
2477                                       struct v3dv_descriptor_map *map,
2478                                       struct v3dv_pipeline_layout *pipeline_layout,
2479                                       uint32_t index);
2480 
2481 struct v3dv_cl_reloc
2482 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2483                                              struct v3dv_descriptor_state *descriptor_state,
2484                                              struct v3dv_descriptor_map *map,
2485                                              struct v3dv_pipeline_layout *pipeline_layout,
2486                                              uint32_t index);
2487 
2488 struct v3dv_bo*
2489 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2490                                    struct v3dv_descriptor_map *map,
2491                                    struct v3dv_pipeline_layout *pipeline_layout,
2492                                    uint32_t index);
2493 
2494 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)2495 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2496                         const struct v3dv_descriptor_set_binding_layout *binding)
2497 {
2498    assert(binding->immutable_samplers_offset);
2499    return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2500 }
2501 
2502 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2503                               struct v3dv_device *device,
2504                               VkPipelineCacheCreateFlags,
2505                               bool cache_enabled);
2506 
2507 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2508 
2509 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2510                                     struct v3dv_pipeline_cache *cache,
2511                                     nir_shader *nir,
2512                                     unsigned char sha1_key[20]);
2513 
2514 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2515                                                struct v3dv_pipeline_cache *cache,
2516                                                const nir_shader_compiler_options *nir_options,
2517                                                unsigned char sha1_key[20]);
2518 
2519 struct v3dv_pipeline_shared_data *
2520 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2521                                         unsigned char sha1_key[20],
2522                                         bool *cache_hit);
2523 
2524 void
2525 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2526                                     struct v3dv_pipeline_cache *cache);
2527 
2528 VkResult
2529 v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
2530                                       nir_shader *nir,
2531                                       VkPipelineLayout pipeline_layout,
2532                                       VkPipeline *pipeline);
2533 
2534 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle)			\
2535    VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2536 
2537 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2538                        VK_OBJECT_TYPE_COMMAND_BUFFER)
2539 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2540 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2541                        VK_OBJECT_TYPE_INSTANCE)
2542 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2543                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2544 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2545 
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer,base,VkBuffer,VK_OBJECT_TYPE_BUFFER)2546 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2547                                VK_OBJECT_TYPE_BUFFER)
2548 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2549                                VK_OBJECT_TYPE_BUFFER_VIEW)
2550 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, vk.base, VkDeviceMemory,
2551                                VK_OBJECT_TYPE_DEVICE_MEMORY)
2552 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2553                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2554 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2555                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
2556 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2557                                VkDescriptorSetLayout,
2558                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2559 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2560 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2561                                VK_OBJECT_TYPE_FRAMEBUFFER)
2562 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2563                                VK_OBJECT_TYPE_IMAGE)
2564 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2565                                VK_OBJECT_TYPE_IMAGE_VIEW)
2566 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2567                                VK_OBJECT_TYPE_PIPELINE)
2568 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2569                                VK_OBJECT_TYPE_PIPELINE_CACHE)
2570 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2571                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2572 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2573                                VK_OBJECT_TYPE_QUERY_POOL)
2574 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2575                                VK_OBJECT_TYPE_RENDER_PASS)
2576 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2577                                VK_OBJECT_TYPE_SAMPLER)
2578 
2579 static inline int
2580 v3dv_ioctl(int fd, unsigned long request, void *arg)
2581 {
2582    if (using_v3d_simulator)
2583       return v3d_simulator_ioctl(fd, request, arg);
2584    else
2585       return drmIoctl(fd, request, arg);
2586 }
2587 
2588 /* Flags OOM conditions in command buffer state.
2589  *
2590  * Note: notice that no-op jobs don't have a command buffer reference.
2591  */
2592 static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_job * job)2593 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2594 {
2595    if (cmd_buffer) {
2596       cmd_buffer->state.oom = true;
2597    } else {
2598       assert(job);
2599       if (job->cmd_buffer)
2600          job->cmd_buffer->state.oom = true;
2601    }
2602 }
2603 
2604 #define v3dv_return_if_oom(_cmd_buffer, _job) do {                  \
2605    const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer;        \
2606    if (__cmd_buffer && __cmd_buffer->state.oom)                     \
2607       return;                                                       \
2608    const struct v3dv_job *__job = _job;                             \
2609    if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom)  \
2610       return;                                                       \
2611 } while(0)                                                          \
2612 
2613 static inline uint32_t
u64_hash(const void * key)2614 u64_hash(const void *key)
2615 {
2616    return _mesa_hash_data(key, sizeof(uint64_t));
2617 }
2618 
2619 static inline bool
u64_compare(const void * key1,const void * key2)2620 u64_compare(const void *key1, const void *key2)
2621 {
2622    return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2623 }
2624 
2625 /* Helper to call hw ver specific functions */
2626 #define v3dv_X(device, thing) ({                      \
2627    __typeof(&v3d42_##thing) v3d_X_thing;              \
2628    switch (device->devinfo.ver) {                     \
2629    case 42:                                           \
2630       v3d_X_thing = &v3d42_##thing;                   \
2631       break;                                          \
2632    case 71:                                           \
2633       v3d_X_thing = &v3d71_##thing;                   \
2634       break;                                          \
2635    default:                                           \
2636       unreachable("Unsupported hardware generation"); \
2637    }                                                  \
2638    v3d_X_thing;                                       \
2639 })
2640 
2641 /* Helper to get hw-specific macro values */
2642 #define V3DV_X(device, thing) ({                                \
2643    __typeof(V3D42_##thing) V3D_X_THING;                         \
2644    switch (device->devinfo.ver) {                               \
2645    case 42:                                                     \
2646       V3D_X_THING = V3D42_##thing;                              \
2647       break;                                                    \
2648    case 71:                                                     \
2649       V3D_X_THING = V3D71_##thing;                              \
2650       break;                                                    \
2651    default:                                                     \
2652       unreachable("Unsupported hardware generation");           \
2653    }                                                            \
2654    V3D_X_THING;                                                 \
2655 })
2656 
2657 
2658 
2659 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2660  * define v3dX for each version supported, because when we compile code that
2661  * is not version-specific, all version-specific macros need to be already
2662  * defined.
2663  */
2664 #ifdef v3dX
2665 #  include "v3dvx_private.h"
2666 #else
2667 #  define v3dX(x) v3d42_##x
2668 #  include "v3dvx_private.h"
2669 #  undef v3dX
2670 
2671 #  define v3dX(x) v3d71_##x
2672 #  include "v3dvx_private.h"
2673 #  undef v3dX
2674 #endif
2675 
2676 VkResult
2677 v3dv_update_image_layout(struct v3dv_device *device,
2678                          struct v3dv_image *image,
2679                          uint64_t modifier,
2680                          bool disjoint,
2681                          const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info);
2682 
2683 #if DETECT_OS_ANDROID
2684 VkResult
2685 v3dv_gralloc_to_drm_explicit_layout(struct u_gralloc *gralloc,
2686                                     struct u_gralloc_buffer_handle *in_hnd,
2687                                     VkImageDrmFormatModifierExplicitCreateInfoEXT *out,
2688                                     VkSubresourceLayout *out_layouts,
2689                                     int max_planes);
2690 
2691 VkResult
2692 v3dv_import_native_buffer_fd(VkDevice device_h,
2693                              int dma_buf,
2694                              const VkAllocationCallbacks *alloc,
2695                              VkImage image_h);
2696 #endif /* DETECT_OS_ANDROID */
2697 
2698 #endif /* V3DV_PRIVATE_H */
2699