• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32 
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38 
39 #include "vk_descriptor_update_template.h"
40 #include "vk_device.h"
41 #include "vk_device_memory.h"
42 #include "vk_format.h"
43 #include "vk_instance.h"
44 #include "vk_image.h"
45 #include "vk_log.h"
46 #include "vk_physical_device.h"
47 #include "vk_shader_module.h"
48 #include "vk_sync.h"
49 #include "vk_sync_timeline.h"
50 #include "vk_util.h"
51 #include "vk_ycbcr_conversion.h"
52 
53 #include "vk_command_buffer.h"
54 #include "vk_command_pool.h"
55 #include "vk_queue.h"
56 #include "vk_pipeline.h"
57 
58 #include <xf86drm.h>
59 
60 #ifdef HAVE_VALGRIND
61 #include <valgrind.h>
62 #include <memcheck.h>
63 #define VG(x) x
64 #else
65 #define VG(x) ((void)0)
66 #endif
67 
68 #include "util/detect_os.h"
69 
70 #include "v3dv_limits.h"
71 
72 #include "common/v3d_device_info.h"
73 #include "common/v3d_limits.h"
74 #include "common/v3d_tiling.h"
75 #include "common/v3d_util.h"
76 
77 #include "compiler/shader_enums.h"
78 #include "compiler/spirv/nir_spirv.h"
79 
80 #include "compiler/v3d_compiler.h"
81 
82 #include "vk_debug_report.h"
83 #include "util/set.h"
84 #include "util/hash_table.h"
85 #include "util/sparse_array.h"
86 #include "util/xmlconfig.h"
87 #include "util/u_atomic.h"
88 
89 #include "v3dv_entrypoints.h"
90 #include "v3dv_bo.h"
91 
92 #include "drm-uapi/v3d_drm.h"
93 
94 #include "vk_alloc.h"
95 #include "perfcntrs/v3d_perfcntrs.h"
96 #include "simulator/v3d_simulator.h"
97 
98 #include "v3dv_cl.h"
99 
100 #include "wsi_common.h"
101 
102 /* A non-fatal assert.  Useful for debugging. */
103 #if MESA_DEBUG
104 #define v3dv_assert(x) ({ \
105    if (unlikely(!(x))) \
106       mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
107 })
108 #else
109 #define v3dv_assert(x)
110 #endif
111 
112 #define perf_debug(...) do {                       \
113    if (V3D_DBG(PERF))                            \
114       mesa_logi(__VA_ARGS__);                \
115 } while (0)
116 
117 struct v3dv_instance;
118 
119 struct v3d_simulator_file;
120 
121 /* Minimum required by the Vulkan 1.1 spec */
122 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
123 
124 /* Maximum performance counters number */
125 #define V3D_MAX_PERFCNT 93
126 
127 struct v3dv_physical_device {
128    struct vk_physical_device vk;
129 
130    char *name;
131    int32_t render_fd;
132    int32_t display_fd;
133 
134    /* We need these because it is not clear how to detect
135     * valid devids in a portable way
136      */
137    bool has_primary;
138    bool has_render;
139 
140    dev_t primary_devid;
141    dev_t render_devid;
142 
143    uint8_t driver_build_sha1[20];
144    uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
145    uint8_t device_uuid[VK_UUID_SIZE];
146    uint8_t driver_uuid[VK_UUID_SIZE];
147 
148    struct vk_sync_type drm_syncobj_type;
149    struct vk_sync_timeline_type sync_timeline_type;
150    const struct vk_sync_type *sync_types[3];
151 
152    struct disk_cache *disk_cache;
153 
154    mtx_t mutex;
155 
156    struct wsi_device wsi_device;
157 
158    VkPhysicalDeviceMemoryProperties memory;
159 
160    struct v3d_device_info devinfo;
161    struct v3d_perfcntrs *perfcntr;
162 
163 #if USE_V3D_SIMULATOR
164    struct v3d_simulator_file *sim_file;
165 #endif
166 
167    const struct v3d_compiler *compiler;
168    uint32_t next_program_id;
169 
170    alignas(8) uint64_t heap_used;
171 
172    /* This array holds all our 'struct v3dv_bo' allocations. We use this
173     * so we can add a refcount to our BOs and check if a particular BO
174     * was already allocated in this device using its GEM handle. This is
175     * necessary to properly manage BO imports, because the kernel doesn't
176     * refcount the underlying BO memory.
177     *
178     * Specifically, when self-importing (i.e. importing a BO into the same
179     * device that created it), the kernel will give us the same BO handle
180     * for both BOs and we must only free it once when  both references are
181     * freed. Otherwise, if we are not self-importing, we get two different BO
182     * handles, and we want to free each one individually.
183     *
184     * The BOs in this map all have a refcnt with the reference counter and
185     * only self-imported BOs will ever have a refcnt > 1.
186     */
187    struct util_sparse_array bo_map;
188 
189    struct {
190       bool merge_jobs;
191    } options;
192 
193    struct {
194       bool cpu_queue;
195       bool multisync;
196       bool perfmon;
197    } caps;
198 };
199 
200 static inline struct v3dv_bo *
v3dv_device_lookup_bo(struct v3dv_physical_device * device,uint32_t handle)201 v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
202 {
203    return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
204 }
205 
206 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
207 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
208 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
209                                                      uint32_t index);
210 
211 void v3dv_meta_clear_init(struct v3dv_device *device);
212 void v3dv_meta_clear_finish(struct v3dv_device *device);
213 
214 void v3dv_meta_blit_init(struct v3dv_device *device);
215 void v3dv_meta_blit_finish(struct v3dv_device *device);
216 
217 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
218 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
219 
220 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
221                            uint8_t plane,
222                            uint8_t miplevel,
223                            const VkOffset3D *offset,
224                            const VkExtent3D *extent,
225                            VkFormat *compat_format);
226 
227 struct v3dv_instance {
228    struct vk_instance vk;
229 
230    bool pipeline_cache_enabled;
231    bool default_pipeline_cache_enabled;
232    bool meta_cache_enabled;
233 };
234 
235 /* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
236  * tfu), we still need a syncobj to track the last overall job submitted
237  * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
238  * start expecting multisync to be present and drop the legacy implementation
239  * together with this V3DV_QUEUE_ANY tracker.
240  */
241 enum v3dv_queue_type {
242    V3DV_QUEUE_CL = 0,
243    V3DV_QUEUE_CSD,
244    V3DV_QUEUE_TFU,
245    V3DV_QUEUE_CPU,
246    V3DV_QUEUE_ANY,
247    V3DV_QUEUE_COUNT,
248 };
249 
250 /* For each GPU queue, we use a syncobj to track the last job submitted. We
251  * set the flag `first` to determine when we are starting a new cmd buffer
252  * batch and therefore a job submitted to a given queue will be the first in a
253  * cmd buf batch.
254  */
255 struct v3dv_last_job_sync {
256    /* If the job is the first submitted to a GPU queue in a cmd buffer batch.
257     *
258     * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync.
259     */
260    bool first[V3DV_QUEUE_COUNT];
261    /* Array of syncobj to track the last job submitted to a GPU queue.
262     *
263     * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each
264     * queue, but without multisync we only track the last job submitted to any
265     * queue in V3DV_QUEUE_ANY.
266     */
267    uint32_t syncs[V3DV_QUEUE_COUNT];
268 };
269 
270 struct v3dv_queue {
271    struct vk_queue vk;
272 
273    struct v3dv_device *device;
274 
275    struct v3dv_last_job_sync last_job_syncs;
276 
277    struct v3dv_job *noop_job;
278 
279    /* The last active perfmon ID to prevent mixing of counter results when a
280     * job is submitted with a different perfmon id.
281     */
282    uint32_t last_perfmon_id;
283 };
284 
285 VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
286                                   struct vk_queue_submit *submit);
287 
288 #define V3DV_META_BLIT_CACHE_KEY_SIZE              (4 * sizeof(uint32_t))
289 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
290                                                     sizeof(VkComponentMapping))
291 
292 struct v3dv_meta_color_clear_pipeline {
293    VkPipeline pipeline;
294    VkRenderPass pass;
295    bool cached;
296    uint64_t key;
297 };
298 
299 struct v3dv_meta_depth_clear_pipeline {
300    VkPipeline pipeline;
301    uint64_t key;
302 };
303 
304 struct v3dv_meta_blit_pipeline {
305    VkPipeline pipeline;
306    VkRenderPass pass;
307    VkRenderPass pass_no_load;
308    uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
309 };
310 
311 struct v3dv_meta_texel_buffer_copy_pipeline {
312    VkPipeline pipeline;
313    VkRenderPass pass;
314    VkRenderPass pass_no_load;
315    uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
316 };
317 
318 struct v3dv_pipeline_key {
319    uint8_t topology;
320    uint8_t logicop_func;
321    bool msaa;
322    bool sample_alpha_to_coverage;
323    bool sample_alpha_to_one;
324    uint8_t cbufs;
325    struct {
326       enum pipe_format format;
327       uint8_t swizzle[4];
328    } color_fmt[V3D_MAX_DRAW_BUFFERS];
329    uint8_t f32_color_rb;
330    uint32_t va_swap_rb_mask;
331    bool has_multiview;
332    bool line_smooth;
333 };
334 
335 struct v3dv_pipeline_cache_stats {
336    uint32_t miss;
337    uint32_t hit;
338    uint32_t count;
339    uint32_t on_disk_hit;
340 };
341 
342 /* Equivalent to gl_shader_stage, but including the coordinate shaders
343  *
344  * FIXME: perhaps move to common
345  */
346 enum broadcom_shader_stage {
347    BROADCOM_SHADER_VERTEX,
348    BROADCOM_SHADER_VERTEX_BIN,
349    BROADCOM_SHADER_GEOMETRY,
350    BROADCOM_SHADER_GEOMETRY_BIN,
351    BROADCOM_SHADER_FRAGMENT,
352    BROADCOM_SHADER_COMPUTE,
353 };
354 
355 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
356 
357 /* Assumes that coordinate shaders will be custom-handled by the caller */
358 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)359 gl_shader_stage_to_broadcom(gl_shader_stage stage)
360 {
361    switch (stage) {
362    case MESA_SHADER_VERTEX:
363       return BROADCOM_SHADER_VERTEX;
364    case MESA_SHADER_GEOMETRY:
365       return BROADCOM_SHADER_GEOMETRY;
366    case MESA_SHADER_FRAGMENT:
367       return BROADCOM_SHADER_FRAGMENT;
368    case MESA_SHADER_COMPUTE:
369       return BROADCOM_SHADER_COMPUTE;
370    default:
371       unreachable("Unknown gl shader stage");
372    }
373 }
374 
375 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)376 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
377 {
378    switch (stage) {
379    case BROADCOM_SHADER_VERTEX:
380    case BROADCOM_SHADER_VERTEX_BIN:
381       return MESA_SHADER_VERTEX;
382    case BROADCOM_SHADER_GEOMETRY:
383    case BROADCOM_SHADER_GEOMETRY_BIN:
384       return MESA_SHADER_GEOMETRY;
385    case BROADCOM_SHADER_FRAGMENT:
386       return MESA_SHADER_FRAGMENT;
387    case BROADCOM_SHADER_COMPUTE:
388       return MESA_SHADER_COMPUTE;
389    default:
390       unreachable("Unknown broadcom shader stage");
391    }
392 }
393 
394 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)395 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
396 {
397    switch (stage) {
398    case BROADCOM_SHADER_VERTEX_BIN:
399    case BROADCOM_SHADER_GEOMETRY_BIN:
400       return true;
401    default:
402       return false;
403    }
404 }
405 
406 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)407 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
408 {
409    switch (stage) {
410    case BROADCOM_SHADER_VERTEX:
411    case BROADCOM_SHADER_GEOMETRY:
412       return true;
413    default:
414       return false;
415    }
416 }
417 
418 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)419 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
420 {
421    switch (stage) {
422    case BROADCOM_SHADER_VERTEX:
423       return BROADCOM_SHADER_VERTEX_BIN;
424    case BROADCOM_SHADER_GEOMETRY:
425       return BROADCOM_SHADER_GEOMETRY_BIN;
426    default:
427       unreachable("Invalid shader stage");
428    }
429 }
430 
431 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)432 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
433 {
434    switch(stage) {
435    case BROADCOM_SHADER_VERTEX_BIN:
436       return "MESA_SHADER_VERTEX_BIN";
437    case BROADCOM_SHADER_GEOMETRY_BIN:
438       return "MESA_SHADER_GEOMETRY_BIN";
439    default:
440       return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
441    }
442 }
443 
444 struct v3dv_pipeline_cache {
445    struct vk_object_base base;
446 
447    struct v3dv_device *device;
448    mtx_t mutex;
449 
450    struct hash_table *nir_cache;
451    struct v3dv_pipeline_cache_stats nir_stats;
452 
453    struct hash_table *cache;
454    struct v3dv_pipeline_cache_stats stats;
455 
456    /* For VK_EXT_pipeline_creation_cache_control. */
457    bool externally_synchronized;
458 };
459 
460 struct v3dv_device {
461    struct vk_device vk;
462 
463    struct v3dv_instance *instance;
464    struct v3dv_physical_device *pdevice;
465 
466    struct v3d_device_info devinfo;
467    struct v3dv_queue queue;
468 
469    /* Guards query->maybe_available and value for timestamps */
470    mtx_t query_mutex;
471 
472    /* Signaled whenever a query is ended */
473    cnd_t query_ended;
474 
475    /* Resources used for meta operations */
476    struct {
477       mtx_t mtx;
478       struct {
479          VkPipelineLayout p_layout;
480          struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
481       } color_clear;
482       struct {
483          VkPipelineLayout p_layout;
484          struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
485       } depth_clear;
486       struct {
487          VkDescriptorSetLayout ds_layout;
488          VkPipelineLayout p_layout;
489          struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
490       } blit;
491       struct {
492          VkDescriptorSetLayout ds_layout;
493          VkPipelineLayout p_layout;
494          struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
495       } texel_buffer_copy;
496    } meta;
497 
498    struct v3dv_bo_cache {
499       /** List of struct v3d_bo freed, by age. */
500       struct list_head time_list;
501       /** List of struct v3d_bo freed, per size, by age. */
502       struct list_head *size_list;
503       uint32_t size_list_size;
504 
505       mtx_t lock;
506 
507       uint32_t cache_size;
508       uint32_t cache_count;
509       uint32_t max_cache_size;
510    } bo_cache;
511 
512    uint32_t bo_size;
513    uint32_t bo_count;
514 
515    /* Event handling resources.
516     *
517     * Our implementation of events uses a BO to store event state (signaled vs
518     * reset) and dispatches compute shaders to handle GPU event functions
519     * (signal, reset, wait). This struct holds all the resources required
520     * by the implementation.
521     */
522    struct {
523       mtx_t lock;
524 
525       /* BO for the event states: signaled (1) or reset (0) */
526       struct v3dv_bo *bo;
527 
528       /* We pre-allocate all the events we can fit for the size of the BO we
529        * create to track their states, where each event has an index which is
530        * basically the offset of its state in that BO. We keep a free list with
531        * the pre-allocated events that are available.
532        */
533       uint32_t event_count;
534       struct v3dv_event *events;
535       struct list_head free_list;
536 
537       /* Vulkan resources to access the event BO from shaders. We have a
538        * pipeline that sets the state of an event and another that waits on
539        * a single event. Both pipelines require access to the event state BO,
540        * for which we need to allocate a single descripot set.
541        */
542       VkBuffer buffer;
543       VkDeviceMemory mem;
544       VkDescriptorSetLayout descriptor_set_layout;
545       VkPipelineLayout pipeline_layout;
546       VkDescriptorPool descriptor_pool;
547       VkDescriptorSet descriptor_set;
548       VkPipeline set_event_pipeline;
549       VkPipeline wait_event_pipeline;
550    } events;
551 
552    /* Query handling resources.
553     *
554     * Our implementation of occlusion queries uses a BO per pool to keep track
555     * of the per-query availability state and dispatches compute shaders to
556     * handle GPU query functions that read and write that state. This struct
557     * holds Vulkan resources that can be shared across all query pools to
558     * implement this. This framework may be extended in the future to handle
559     * more query types.
560     */
561    struct {
562       VkDescriptorSetLayout buf_descriptor_set_layout;
563 
564       /* Set query availability */
565       VkPipelineLayout avail_pipeline_layout;
566       VkPipeline avail_pipeline;
567 
568       /* Reset query availability and clear occlusion counters */
569       VkPipelineLayout reset_occlusion_pipeline_layout;
570       VkPipeline reset_occlusion_pipeline;
571 
572       /* Copy query results */
573       VkPipelineLayout copy_pipeline_layout;
574       VkPipeline copy_pipeline[8];
575    } queries;
576 
577    struct v3dv_pipeline_cache default_pipeline_cache;
578 
579    /* GL_SHADER_STATE_RECORD needs to specify default attribute values. The
580     * following covers the most common case, that is all attributes format
581     * being float being float, allowing us to reuse the same BO for all
582     * pipelines matching this requirement. Pipelines that need integer
583     * attributes will create their own BO.
584     *
585     * Note that since v71 the default attribute values are not needed, so this
586     * can be NULL.
587     */
588    struct v3dv_bo *default_attribute_float;
589 
590    void *device_address_mem_ctx;
591    struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */
592 };
593 
594 struct v3dv_device_memory {
595    struct vk_device_memory vk;
596 
597    struct v3dv_bo *bo;
598    const VkMemoryType *type;
599    bool is_for_wsi;
600    bool is_for_device_address;
601 };
602 
603 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
604 #define TEXTURE_DATA_FORMAT_NO     255
605 
606 #define V3DV_MAX_PLANE_COUNT 3
607 struct v3dv_format_plane {
608    /* One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
609    uint8_t rt_type;
610 
611    /* One of V3D42_TEXTURE_DATA_FORMAT_*. */
612    uint8_t tex_type;
613 
614    /* Swizzle to apply to the RGBA shader output for storing to the tile
615     * buffer, to the RGBA tile buffer to produce shader input (for
616     * blending), and for turning the rgba8888 texture sampler return
617     * value into shader rgba values.
618     */
619    uint8_t swizzle[4];
620 
621    /* Whether the return value is 16F/I/UI or 32F/I/UI. */
622    uint8_t return_size;
623 };
624 
625 struct v3dv_format {
626    /* Non 0 plane count implies supported */
627    uint8_t plane_count;
628 
629    struct v3dv_format_plane planes[V3DV_MAX_PLANE_COUNT];
630 
631    /* If the format supports (linear) filtering when texturing. */
632    bool supports_filtering;
633 };
634 
635 /* Note that although VkImageAspectFlags would allow to combine more than one
636  * PLANE bit, for all the use cases we implement that use VkImageAspectFlags,
637  * only one plane is allowed, like for example vkCmdCopyImage:
638  *
639  *   "If srcImage has a VkFormat with two planes then for each element of
640  *    pRegions, srcSubresource.aspectMask must be VK_IMAGE_ASPECT_PLANE_0_BIT
641  *    or VK_IMAGE_ASPECT_PLANE_1_BIT"
642  *
643  */
v3dv_plane_from_aspect(VkImageAspectFlags aspect)644 static uint8_t v3dv_plane_from_aspect(VkImageAspectFlags aspect)
645 {
646    switch (aspect) {
647    case VK_IMAGE_ASPECT_COLOR_BIT:
648    case VK_IMAGE_ASPECT_DEPTH_BIT:
649    case VK_IMAGE_ASPECT_STENCIL_BIT:
650    case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
651    case VK_IMAGE_ASPECT_PLANE_0_BIT:
652    case VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT:
653       return 0;
654    case VK_IMAGE_ASPECT_PLANE_1_BIT:
655    case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
656       return 1;
657    case VK_IMAGE_ASPECT_PLANE_2_BIT:
658    case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
659       return 2;
660    default:
661       unreachable("invalid image aspect");
662    }
663 }
664 
665 struct v3d_resource_slice {
666    uint32_t offset;
667    uint32_t stride;
668    uint32_t padded_height;
669    uint32_t width;
670    uint32_t height;
671    /* Size of a single pane of the slice.  For 3D textures, there will be
672     * a number of panes equal to the minified, power-of-two-aligned
673     * depth.
674     */
675    uint32_t size;
676    uint8_t ub_pad;
677    enum v3d_tiling_mode tiling;
678    uint32_t padded_height_of_output_image_in_uif_blocks;
679 };
680 
681 bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
682 bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
683 
684 struct v3dv_image {
685    struct vk_image vk;
686 
687    const struct v3dv_format *format;
688    bool tiled;
689 
690    uint8_t plane_count;
691 
692    /* If 0, this is a multi-plane image with use disjoint memory, where each
693     * plane binds a different device memory. Otherwise, all the planes share
694     * the same device memory and this stores the total size of the image in
695     * bytes.
696     */
697    uint32_t non_disjoint_size;
698 
699    struct {
700       uint32_t cpp;
701 
702       struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
703       /* Total size of the plane in bytes. */
704       uint64_t size;
705       uint32_t cube_map_stride;
706 
707       /* If not using disjoint memory, mem and mem_offset is the same for all
708        * planes, in which case mem_offset is the offset of plane 0.
709        */
710       struct v3dv_device_memory *mem;
711       VkDeviceSize mem_offset;
712       uint32_t alignment;
713 
714       /* Pre-subsampled per plane width and height
715        */
716       uint32_t width;
717       uint32_t height;
718 
719       /* Even if we can get it from the parent image format, we keep the
720        * format here for convenience
721        */
722       VkFormat vk_format;
723    } planes[V3DV_MAX_PLANE_COUNT];
724 
725    /* Used only when sampling a linear texture (which V3D doesn't support).
726     * This holds a tiled copy of the image we can use for that purpose.
727     */
728    struct v3dv_image *shadow;
729 };
730 
731 VkResult
732 v3dv_image_init(struct v3dv_device *device,
733                 const VkImageCreateInfo *pCreateInfo,
734                 const VkAllocationCallbacks *pAllocator,
735                 struct v3dv_image *image);
736 
737 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
738 
739 static uint32_t
v3dv_image_aspect_to_plane(const struct v3dv_image * image,VkImageAspectFlagBits aspect)740 v3dv_image_aspect_to_plane(const struct v3dv_image *image,
741                            VkImageAspectFlagBits aspect)
742 {
743    assert(util_bitcount(aspect) == 1 && (aspect & image->vk.aspects));
744 
745    /* Because we always put image and view planes in aspect-bit-order, the
746     * plane index is the number of bits in the image aspect before aspect.
747     */
748    return util_bitcount(image->vk.aspects & (aspect - 1));
749 }
750 
751 /* Pre-generating packets needs to consider changes in packet sizes across hw
752  * versions. Keep things simple and allocate enough space for any supported
753  * version. We ensure the size is large enough through static asserts.
754  */
755 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
756 #define V3DV_SAMPLER_STATE_LENGTH 24
757 #define V3DV_BLEND_CFG_LENGTH 5
758 #define V3DV_CFG_BITS_LENGTH 4
759 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
760 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
761 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
762 #define V3DV_STENCIL_CFG_LENGTH 6
763 
764 struct v3dv_image_view {
765    struct vk_image_view vk;
766 
767    const struct v3dv_format *format;
768 
769    uint8_t view_swizzle[4];
770 
771    uint8_t plane_count;
772    struct {
773       uint8_t image_plane;
774 
775       bool swap_rb;
776       bool channel_reverse;
777       uint32_t internal_bpp;
778       uint32_t internal_type;
779       uint32_t offset;
780 
781       /* Precomputed swizzle (composed from the view swizzle and the format
782        * swizzle).
783        *
784        * This could be also included on the descriptor bo, but the shader state
785        * packet doesn't need it on a bo, so we can just avoid a memory copy
786        */
787       uint8_t swizzle[4];
788 
789       /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
790        * during UpdateDescriptorSets.
791        *
792        * Empirical tests show that cube arrays need a different shader state
793        * depending on whether they are used with a sampler or not, so for these
794        * we generate two states and select the one to use based on the descriptor
795        * type.
796        */
797       uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
798    } planes[V3DV_MAX_PLANE_COUNT];
799 
800    /* Used only when sampling a linear texture (which V3D doesn't support).
801     * This would represent a view over the tiled shadow image.
802     */
803    struct v3dv_image_view *shadow;
804 };
805 
806 VkResult v3dv_create_image_view(struct v3dv_device *device,
807                                 const VkImageViewCreateInfo *pCreateInfo,
808                                 VkImageView *pView);
809 
810 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer,
811                            uint8_t plane);
812 
813 struct v3dv_buffer {
814    struct vk_object_base base;
815 
816    VkDeviceSize size;
817    VkBufferUsageFlagBits2KHR usage;
818    uint32_t alignment;
819 
820    struct v3dv_device_memory *mem;
821    VkDeviceSize mem_offset;
822 };
823 
824 void
825 v3dv_buffer_init(struct v3dv_device *device,
826                  const VkBufferCreateInfo *pCreateInfo,
827                  struct v3dv_buffer *buffer,
828                  uint32_t alignment);
829 
830 void
831 v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo *info);
832 
833 struct v3dv_buffer_view {
834    struct vk_object_base base;
835 
836    struct v3dv_buffer *buffer;
837 
838    VkFormat vk_format;
839    const struct v3dv_format *format;
840    uint32_t internal_bpp;
841    uint32_t internal_type;
842 
843    uint32_t offset;
844    uint32_t size;
845    uint32_t num_elements;
846 
847    /* Prepacked TEXTURE_SHADER_STATE. */
848    uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
849 };
850 
851 struct v3dv_subpass_attachment {
852    uint32_t attachment;
853    VkImageLayout layout;
854 };
855 
856 struct v3dv_subpass {
857    uint32_t input_count;
858    struct v3dv_subpass_attachment *input_attachments;
859 
860    uint32_t color_count;
861    struct v3dv_subpass_attachment *color_attachments;
862    struct v3dv_subpass_attachment *resolve_attachments;
863 
864    struct v3dv_subpass_attachment ds_attachment;
865    struct v3dv_subpass_attachment ds_resolve_attachment;
866    bool resolve_depth, resolve_stencil;
867 
868    /* If we need to emit the clear of the depth/stencil attachment using a
869     * a draw call instead of using the TLB (GFXH-1461).
870     */
871    bool do_depth_clear_with_draw;
872    bool do_stencil_clear_with_draw;
873 
874    /* Multiview */
875    uint32_t view_mask;
876 };
877 
878 struct v3dv_render_pass_attachment {
879    VkAttachmentDescription2 desc;
880 
881    uint32_t first_subpass;
882    uint32_t last_subpass;
883 
884    /* When multiview is enabled, we no longer care about when a particular
885     * attachment is first or last used in a render pass, since not all views
886     * in the attachment will meet that criteria. Instead, we need to track
887     * each individual view (layer) in each attachment and emit our stores,
888     * loads and clears accordingly.
889     */
890    struct {
891       uint32_t first_subpass;
892       uint32_t last_subpass;
893    } views[MAX_MULTIVIEW_VIEW_COUNT];
894 
895    /* If this is a multisampled attachment that is going to be resolved,
896     * whether we may be able to use the TLB hardware resolve based on the
897     * attachment format.
898     */
899    bool try_tlb_resolve;
900 };
901 
902 struct v3dv_render_pass {
903    struct vk_object_base base;
904 
905    bool multiview_enabled;
906 
907    uint32_t attachment_count;
908    struct v3dv_render_pass_attachment *attachments;
909 
910    uint32_t subpass_count;
911    struct v3dv_subpass *subpasses;
912 
913    struct v3dv_subpass_attachment *subpass_attachments;
914 };
915 
916 struct v3dv_framebuffer {
917    struct vk_object_base base;
918 
919    uint32_t width;
920    uint32_t height;
921    uint32_t layers;
922 
923    /* Typically, edge tiles in the framebuffer have padding depending on the
924     * underlying tiling layout. One consequence of this is that when the
925     * framebuffer dimensions are not aligned to tile boundaries, tile stores
926     * would still write full tiles on the edges and write to the padded area.
927     * If the framebuffer is aliasing a smaller region of a larger image, then
928     * we need to be careful with this though, as we won't have padding on the
929     * edge tiles (which typically means that we need to load the tile buffer
930     * before we store).
931     */
932    bool has_edge_padding;
933 
934    uint32_t attachment_count;
935    uint32_t color_attachment_count;
936 
937    /* Notice that elements in 'attachments' will be NULL if the framebuffer
938     * was created imageless. The driver is expected to access attachment info
939     * from the command buffer state instead.
940     */
941    struct v3dv_image_view *attachments[0];
942 };
943 
944 struct v3dv_frame_tiling {
945    uint32_t width;
946    uint32_t height;
947    uint32_t layers;
948    uint32_t render_target_count;
949    uint32_t internal_bpp;
950    uint32_t total_color_bpp;
951    bool     msaa;
952    bool     double_buffer;
953    uint32_t tile_width;
954    uint32_t tile_height;
955    uint32_t draw_tiles_x;
956    uint32_t draw_tiles_y;
957    uint32_t supertile_width;
958    uint32_t supertile_height;
959    uint32_t frame_width_in_supertiles;
960    uint32_t frame_height_in_supertiles;
961 };
962 
963 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
964                                        const VkRect2D *area,
965                                        struct v3dv_framebuffer *fb,
966                                        struct v3dv_render_pass *pass,
967                                        uint32_t subpass_idx);
968 
969 /* Checks if we need to emit 2 initial tile clears for double buffer mode.
970  * This happens when we render at least 2 tiles, because in this mode each
971  * tile uses a different half of the tile buffer memory so we can have 2 tiles
972  * in flight (one being stored to memory and the next being rendered). In this
973  * scenario, if we emit a single initial tile clear we would only clear the
974  * first half of the tile buffer.
975  */
976 static inline bool
v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling * tiling)977 v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
978 {
979    return tiling->double_buffer &&
980           (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
981            tiling->layers > 1);
982 }
983 
984 enum v3dv_cmd_buffer_status {
985    V3DV_CMD_BUFFER_STATUS_NEW           = 0,
986    V3DV_CMD_BUFFER_STATUS_INITIALIZED   = 1,
987    V3DV_CMD_BUFFER_STATUS_RECORDING     = 2,
988    V3DV_CMD_BUFFER_STATUS_EXECUTABLE    = 3
989 };
990 
991 union v3dv_clear_value {
992    uint32_t color[4];
993    struct {
994       float z;
995       uint8_t s;
996    };
997 };
998 
999 struct v3dv_cmd_buffer_attachment_state {
1000    /* The original clear value as provided by the Vulkan API */
1001    VkClearValue vk_clear_value;
1002 
1003    /* The hardware clear value */
1004    union v3dv_clear_value clear_value;
1005 
1006    /* The underlying image view (from the framebuffer or, if imageless
1007     * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
1008     */
1009    struct v3dv_image_view *image_view;
1010 
1011    /* If this is a multisampled attachment with a resolve operation. */
1012    bool has_resolve;
1013 
1014    /* If this is a multisampled attachment with a resolve operation,
1015     * whether we can use the TLB for the resolve.
1016     */
1017    bool use_tlb_resolve;
1018 };
1019 
1020 /* Cached values derived from Vulkan viewport/count */
1021 struct v3dv_viewport_state {
1022    float translate[MAX_VIEWPORTS][3];
1023    float scale[MAX_VIEWPORTS][3];
1024 };
1025 
1026 /* Flags for custom dirty state, that could lead to packet emission.
1027  *
1028  * Note *custom*, for all the dynamic state tracking coming from the Vulkan
1029  * API, we use the Mesa runtime framework and their predefined flags
1030  * (MESA_VK_DYNAMIC_XXX).
1031  *
1032  * Here we defined additional flags used to track dirty state.
1033  */
1034 enum v3dv_cmd_dirty_bits {
1035    V3DV_CMD_DIRTY_PIPELINE                  = 1 << 0,
1036    V3DV_CMD_DIRTY_COMPUTE_PIPELINE          = 1 << 1,
1037    V3DV_CMD_DIRTY_VERTEX_BUFFER             = 1 << 2,
1038    V3DV_CMD_DIRTY_INDEX_BUFFER              = 1 << 3,
1039    V3DV_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 4,
1040    V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS   = 1 << 5,
1041    V3DV_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 6,
1042    V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO        = 1 << 7,
1043    V3DV_CMD_DIRTY_OCCLUSION_QUERY           = 1 << 8,
1044    V3DV_CMD_DIRTY_VIEW_INDEX                = 1 << 9,
1045    V3DV_CMD_DIRTY_DRAW_ID                   = 1 << 10,
1046    V3DV_CMD_DIRTY_ALL                       = (1 << 10) - 1,
1047 };
1048 
1049 struct v3dv_dynamic_state {
1050    /* FIXME: we keep some viewport info cached (translate, scale) because we
1051     * use that on more that one place. But note that translate_z and scale_z
1052     * is also used in several places, and we recompute it based on
1053     * scissor/viewport info all time. So perhaps we could do the same with the
1054     * x and y component.
1055     */
1056    struct v3dv_viewport_state viewport;
1057 
1058    /* We cache the color_write_enable as the vulkan runtime keeps a 8-bit
1059     * bitset with a bit per attachment, but in order to combine with the
1060     * color_write_masks is easier to cache a 32-bit bitset with 4 bits per
1061     * attachment.
1062     */
1063    uint32_t color_write_enable;
1064 };
1065 
1066 void v3dv_viewport_compute_xform(const VkViewport *viewport,
1067                                  float scale[3],
1068                                  float translate[3]);
1069 
1070 enum v3dv_ez_state {
1071    V3D_EZ_UNDECIDED = 0,
1072    V3D_EZ_GT_GE,
1073    V3D_EZ_LT_LE,
1074    V3D_EZ_DISABLED,
1075 };
1076 
1077 enum v3dv_job_type {
1078    V3DV_JOB_TYPE_GPU_CL = 0,
1079    V3DV_JOB_TYPE_GPU_CL_INCOMPLETE,
1080    V3DV_JOB_TYPE_GPU_TFU,
1081    V3DV_JOB_TYPE_GPU_CSD,
1082    V3DV_JOB_TYPE_CPU_RESET_QUERIES,
1083    V3DV_JOB_TYPE_CPU_END_QUERY,
1084    V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
1085    V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
1086    V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
1087 };
1088 
1089 struct v3dv_reset_query_cpu_job_info {
1090    struct v3dv_query_pool *pool;
1091    uint32_t first;
1092    uint32_t count;
1093 };
1094 
1095 struct v3dv_end_query_info {
1096    struct v3dv_query_pool *pool;
1097    uint32_t query;
1098 
1099    /* This is one unless multiview is used */
1100    uint32_t count;
1101 };
1102 
1103 struct v3dv_copy_query_results_cpu_job_info {
1104    struct v3dv_query_pool *pool;
1105    uint32_t first;
1106    uint32_t count;
1107    struct v3dv_buffer *dst;
1108    uint32_t offset;
1109    uint32_t stride;
1110    VkQueryResultFlags flags;
1111 };
1112 
1113 struct v3dv_submit_sync_info {
1114    /* List of syncs to wait before running a job */
1115    uint32_t wait_count;
1116    struct vk_sync_wait *waits;
1117 
1118    /* List of syncs to signal when all jobs complete */
1119    uint32_t signal_count;
1120    struct vk_sync_signal *signals;
1121 };
1122 
1123 struct v3dv_csd_indirect_cpu_job_info {
1124    struct v3dv_buffer *buffer;
1125    uint32_t offset;
1126    struct v3dv_job *csd_job;
1127    uint32_t wg_size;
1128    uint32_t *wg_uniform_offsets[3];
1129    bool needs_wg_uniform_rewrite;
1130 };
1131 
1132 struct v3dv_timestamp_query_cpu_job_info {
1133    struct v3dv_query_pool *pool;
1134    uint32_t query;
1135 
1136    /* This is one unless multiview is used */
1137    uint32_t count;
1138 };
1139 
1140 /* Number of perfmons required to handle all supported performance counters */
1141 #define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_MAX_PERFCNT, \
1142                                        DRM_V3D_MAX_PERF_COUNTERS)
1143 
1144 struct v3dv_perf_query {
1145    uint32_t kperfmon_ids[V3DV_MAX_PERFMONS];
1146 
1147    /* A DRM syncobj to wait on the GPU jobs for which we are collecting
1148     * performance data.
1149     */
1150    struct vk_sync *last_job_sync;
1151 };
1152 
1153 struct v3dv_job {
1154    struct list_head list_link;
1155 
1156    /* We only create job clones when executing secondary command buffers into
1157     * primaries. These clones don't make deep copies of the original object
1158     * so we want to flag them to avoid freeing resources they don't own.
1159     */
1160    bool is_clone;
1161 
1162    /* If this is a cloned job, if it has its own BCL resource. This happens
1163     * when we suspend jobs with in command buffers with the
1164     * VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT flag.
1165     */
1166    bool clone_owns_bcl;
1167 
1168    /* VK_KHR_dynamic_rendering */
1169    bool suspending;
1170    bool resuming;
1171    struct v3dv_cl_out *suspend_branch_inst_ptr;
1172    uint32_t suspended_bcl_end;
1173 
1174    /* If the job executes on the transfer stage of the pipeline */
1175    bool is_transfer;
1176 
1177    /* VK_KHR_buffer_device_address allows shaders to use pointers that can
1178     * dereference memory in any buffer that has been flagged with
1179     * VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT. These buffers may not
1180     * be bound via descriptor sets, so we need to make sure that a job that
1181     * uses this functionality includes all these buffers in its kernel
1182     * submission.
1183     */
1184    bool uses_buffer_device_address;
1185 
1186    /* True if we have not identified anything that would be incompatible
1187     * with double-buffer (like MSAA) or that would make double-buffer mode
1188     * not efficient (like tile loads or not having any stores).
1189     */
1190    bool can_use_double_buffer;
1191 
1192    /* This structure keeps track of various scores to inform a heuristic
1193     * for double-buffer mode.
1194     */
1195    struct v3d_double_buffer_score double_buffer_score;
1196 
1197    /* We only need to allocate tile state for all layers if the binner
1198     * writes primitives to layers other than the first. This can only be
1199     * done using layered rendering (writing gl_Layer from a geometry shader),
1200     * so for other cases of multilayered framebuffers (typically with
1201     * meta copy/clear operations) that won't use layered rendering, we only
1202     * need one layer worth of of tile state for the binner.
1203     */
1204    bool allocate_tile_state_for_all_layers;
1205 
1206    /* A pointer to the location of the TILE_BINNING_MODE_CFG packet so we can
1207     * rewrite it to enable double-buffer mode by the time we have enough info
1208     * about the job to make that decision.
1209     */
1210    struct v3dv_cl_out *bcl_tile_binning_mode_ptr;
1211 
1212    enum v3dv_job_type type;
1213 
1214    struct v3dv_device *device;
1215 
1216    struct v3dv_cmd_buffer *cmd_buffer;
1217 
1218    struct v3dv_cl bcl;
1219    struct v3dv_cl rcl;
1220    struct v3dv_cl indirect;
1221 
1222    /* Set of all BOs referenced by the job. This will be used for making
1223     * the list of BOs that the kernel will need to have paged in to
1224     * execute our job.
1225     */
1226    struct set *bos;
1227    uint32_t bo_count;
1228    uint64_t bo_handle_mask;
1229 
1230    struct v3dv_bo *tile_alloc;
1231    struct v3dv_bo *tile_state;
1232 
1233    bool tmu_dirty_rcl;
1234 
1235    uint32_t first_subpass;
1236 
1237    /* When the current subpass is split into multiple jobs, this flag is set
1238     * to true for any jobs after the first in the same subpass.
1239     */
1240    bool is_subpass_continue;
1241 
1242    /* If this job is the last job emitted for a subpass. */
1243    bool is_subpass_finish;
1244 
1245    struct v3dv_frame_tiling frame_tiling;
1246 
1247    enum v3dv_ez_state ez_state;
1248    enum v3dv_ez_state first_ez_state;
1249 
1250    /* If we have already decided if we need to disable Early Z/S completely
1251     * for this job.
1252     */
1253    bool decided_global_ez_enable;
1254 
1255    /* If the job emitted any draw calls with Early Z/S enabled */
1256    bool has_ez_draws;
1257 
1258    /* If this job has been configured to use early Z/S clear */
1259    bool early_zs_clear;
1260 
1261    /* Number of draw calls recorded into the job */
1262    uint32_t draw_count;
1263 
1264    /* A flag indicating whether we want to flush every draw separately. This
1265     * can be used for debugging, or for cases where special circumstances
1266     * require this behavior.
1267     */
1268    bool always_flush;
1269 
1270    /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
1271     * can use this to select the hw queues where we need to serialize the job.
1272     */
1273    uint8_t serialize;
1274 
1275    /* If this is a CL job, whether we should sync before binning */
1276    bool needs_bcl_sync;
1277 
1278    /* If we have emitted a (default) point size packet in this job */
1279    bool emitted_default_point_size;
1280 
1281    /* Job specs for CPU jobs */
1282    union {
1283       struct v3dv_reset_query_cpu_job_info          query_reset;
1284       struct v3dv_end_query_info                    query_end;
1285       struct v3dv_copy_query_results_cpu_job_info   query_copy_results;
1286       struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
1287       struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
1288    } cpu;
1289 
1290    /* Job specs for TFU jobs */
1291    struct drm_v3d_submit_tfu tfu;
1292 
1293    /* Job specs for CSD jobs */
1294    struct {
1295       struct v3dv_bo *shared_memory;
1296       uint32_t wg_count[3];
1297       uint32_t wg_base[3];
1298       struct drm_v3d_submit_csd submit;
1299    } csd;
1300 
1301    /* Perfmons with last job sync for CSD and CL jobs */
1302    struct v3dv_perf_query *perf;
1303 };
1304 
1305 void v3dv_job_init(struct v3dv_job *job,
1306                    enum v3dv_job_type type,
1307                    struct v3dv_device *device,
1308                    struct v3dv_cmd_buffer *cmd_buffer,
1309                    int32_t subpass_idx);
1310 void v3dv_job_destroy(struct v3dv_job *job);
1311 
1312 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1313 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1314 
1315 void v3dv_job_start_frame(struct v3dv_job *job,
1316                           uint32_t width,
1317                           uint32_t height,
1318                           uint32_t layers,
1319                           bool allocate_tile_state_for_all_layers,
1320                           bool allocate_tile_state_now,
1321                           uint32_t render_target_count,
1322                           uint8_t max_internal_bpp,
1323                           uint8_t total_color_bpp,
1324                           bool msaa);
1325 
1326 bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1327 
1328 struct v3dv_job *
1329 v3dv_job_clone(struct v3dv_job *job, bool skip_bcl);
1330 
1331 struct v3dv_job *
1332 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1333                              struct v3dv_cmd_buffer *cmd_buffer);
1334 
1335 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1336                                                 enum v3dv_job_type type,
1337                                                 struct v3dv_cmd_buffer *cmd_buffer,
1338                                                 uint32_t subpass_idx);
1339 
1340 void
1341 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1342                                    uint32_t slot_size,
1343                                    uint32_t used_count,
1344                                    uint32_t *alloc_count,
1345                                    void **ptr);
1346 
1347 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
1348                                    bool indexed, bool indirect,
1349                                    uint32_t vertex_count);
1350 
1351 bool v3dv_job_allocate_tile_state(struct v3dv_job *job);
1352 
1353 void
1354 v3dv_setup_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer,
1355                                const VkRenderingInfoKHR *pRenderingInfo);
1356 
1357 void
1358 v3dv_destroy_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer);
1359 
1360 void
1361 v3dv_setup_dynamic_render_pass(struct v3dv_cmd_buffer *cmd_buffer,
1362                                const VkRenderingInfoKHR *pRenderingInfo);
1363 
1364 void
1365 v3dv_setup_dynamic_render_pass_inheritance(struct v3dv_cmd_buffer *cmd_buffer,
1366                                            const VkCommandBufferInheritanceRenderingInfo *info);
1367 
1368 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1369  * cmd_buffer specific header?
1370  */
1371 struct v3dv_draw_info {
1372    uint32_t vertex_count;
1373    uint32_t instance_count;
1374    uint32_t first_vertex;
1375    uint32_t first_instance;
1376 };
1377 
1378 struct v3dv_vertex_binding {
1379    struct v3dv_buffer *buffer;
1380    VkDeviceSize offset;
1381    VkDeviceSize size;
1382 };
1383 
1384 struct v3dv_descriptor_state {
1385    struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1386    uint32_t valid;
1387    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1388 };
1389 
1390 struct v3dv_cmd_pipeline_state {
1391    struct v3dv_pipeline *pipeline;
1392 
1393    struct v3dv_descriptor_state descriptor_state;
1394 };
1395 
1396 enum {
1397    V3DV_BARRIER_GRAPHICS_BIT = (1 << 0),
1398    V3DV_BARRIER_COMPUTE_BIT  = (1 << 1),
1399    V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
1400    V3DV_BARRIER_CPU_BIT      = (1 << 3),
1401 };
1402 #define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
1403                           V3DV_BARRIER_TRANSFER_BIT | \
1404                           V3DV_BARRIER_COMPUTE_BIT | \
1405                           V3DV_BARRIER_CPU_BIT);
1406 
1407 struct v3dv_barrier_state {
1408    /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
1409    uint8_t dst_mask;
1410 
1411    /* For each possible consumer of a barrier, a mask of V3DV_BARRIER_*
1412     * indicating the sources of the dependency.
1413     */
1414    uint8_t src_mask_graphics;
1415    uint8_t src_mask_transfer;
1416    uint8_t src_mask_compute;
1417 
1418    /* For graphics barriers, access masks involved. Used to decide if we need
1419     * to execute a binning or render barrier.
1420     */
1421    VkAccessFlags2 bcl_buffer_access;
1422    VkAccessFlags2 bcl_image_access;
1423 };
1424 
1425 struct v3dv_cmd_buffer_state {
1426    struct v3dv_render_pass *pass;
1427    struct v3dv_framebuffer *framebuffer;
1428 
1429    /* VK_KHR_dynamic_rendering */
1430    struct v3dv_render_pass dynamic_pass;
1431    struct v3dv_subpass dynamic_subpass;
1432    struct v3dv_render_pass_attachment dynamic_attachments[18 /* (8 color + D/S) x 2 (for resolves) */];
1433    struct v3dv_subpass_attachment dynamic_subpass_attachments[18];
1434    struct v3dv_framebuffer *dynamic_framebuffer;
1435 
1436    VkRect2D render_area;
1437 
1438    /* Current job being recorded */
1439    struct v3dv_job *job;
1440 
1441    uint32_t subpass_idx;
1442 
1443    struct v3dv_cmd_pipeline_state gfx;
1444    struct v3dv_cmd_pipeline_state compute;
1445 
1446    /* For most state tracking we rely on vk_dynamic_graphics_state, but we
1447     * maintain a custom structure for some state-related data that we want to
1448     * cache.
1449     */
1450    struct v3dv_dynamic_state dynamic;
1451 
1452    /* This dirty is for v3dv_cmd_dirty_bits (FIXME: perhaps we should be more
1453     * explicit about it). For dirty flags coming from Vulkan dynamic state,
1454     * use the vk_dynamic_graphics_state handled by the vk_cmd_buffer
1455     */
1456    uint32_t dirty;
1457    VkShaderStageFlagBits dirty_descriptor_stages;
1458    VkShaderStageFlagBits dirty_push_constants_stages;
1459 
1460    /* Current clip window. We use this to check whether we have an active
1461     * scissor, since in that case we can't use TLB clears and need to fallback
1462     * to drawing rects.
1463     */
1464    VkRect2D clip_window;
1465 
1466    /* Whether our render area is aligned to tile boundaries. If this is false
1467     * then we have tiles that are only partially covered by the render area,
1468     * and therefore, we need to be careful with our loads and stores so we don't
1469     * modify pixels for the tile area that is not covered by the render area.
1470     * This means, for example, that we can't use the TLB to clear, since that
1471     * always clears full tiles.
1472     */
1473    bool tile_aligned_render_area;
1474 
1475    /* FIXME: we have just one client-side BO for the push constants,
1476     * independently of the stageFlags in vkCmdPushConstants, and the
1477     * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1478     * tuning in the future if it makes sense.
1479     */
1480    uint32_t push_constants_size;
1481    uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1482 
1483    uint32_t attachment_alloc_count;
1484    struct v3dv_cmd_buffer_attachment_state *attachments;
1485 
1486    struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1487 
1488    struct {
1489       VkBuffer buffer;
1490       VkDeviceSize offset;
1491       VkDeviceSize size;
1492       uint8_t index_size;
1493    } index_buffer;
1494 
1495    /* Current uniforms */
1496    struct {
1497       struct v3dv_cl_reloc vs_bin;
1498       struct v3dv_cl_reloc vs;
1499       struct v3dv_cl_reloc gs_bin;
1500       struct v3dv_cl_reloc gs;
1501       struct v3dv_cl_reloc fs;
1502    } uniforms;
1503 
1504    /* Current view index for multiview rendering */
1505    uint32_t view_index;
1506 
1507    /* Current draw ID for multidraw */
1508    uint32_t draw_id;
1509 
1510    /* Used to flag OOM conditions during command buffer recording */
1511    bool oom;
1512 
1513    /* If we are currently recording job(s) for a transfer operation */
1514    bool is_transfer;
1515 
1516    /* VK_KHR_dynamic_rendering */
1517    bool suspending;
1518    bool resuming;
1519 
1520    /* Barrier state tracking */
1521    struct v3dv_barrier_state barrier;
1522 
1523    /* Secondary command buffer state */
1524    struct {
1525       bool occlusion_query_enable;
1526    } inheritance;
1527 
1528    /* Command buffer state saved during a meta operation */
1529    struct {
1530       uint32_t subpass_idx;
1531       VkRenderPass pass;
1532       VkFramebuffer framebuffer;
1533 
1534       uint32_t attachment_alloc_count;
1535       uint32_t attachment_count;
1536       struct v3dv_cmd_buffer_attachment_state *attachments;
1537 
1538       bool tile_aligned_render_area;
1539       VkRect2D render_area;
1540 
1541       struct vk_dynamic_graphics_state dynamic_graphics_state;
1542       struct v3dv_dynamic_state dynamic;
1543 
1544       struct v3dv_cmd_pipeline_state gfx;
1545       bool has_descriptor_state;
1546 
1547       uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1548       uint32_t push_constants_size;
1549    } meta;
1550 
1551    /* Command buffer state for queries */
1552    struct {
1553       /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1554        * a render pass. We queue these here and then schedule the corresponding
1555        * CPU jobs for them at the time we finish the GPU job in which they have
1556        * been recorded.
1557        */
1558       struct {
1559          uint32_t used_count;
1560          uint32_t alloc_count;
1561          struct v3dv_end_query_info *states;
1562       } end;
1563 
1564       struct {
1565          /* This BO is not NULL if we have an active occlusion query, that is,
1566           * we have called vkCmdBeginQuery but not vkCmdEndQuery.
1567           */
1568          struct v3dv_bo *bo;
1569          uint32_t offset;
1570          /* When the driver emits draw calls to implement other operations in
1571           * the middle of a render pass (such as an attachment clear), we need
1572           * to pause occlusion query recording and resume it later so that
1573           * these draw calls don't register in occlussion counters. We use
1574           * this to store the BO reference in which we should resume occlusion
1575           * query counters after the driver is done emitting its draw calls.
1576            */
1577          struct v3dv_bo *paused_bo;
1578 
1579          /* This pointer is not NULL if we have an active performance query */
1580          struct v3dv_perf_query *perf;
1581       } active_query;
1582    } query;
1583 
1584    /* This is dynamic state since VK_EXT_extended_dynamic_state. */
1585    bool z_updates_enable;
1586 
1587    /* ez_state can be dynamic since VK_EXT_extended_dynamic_state so we need
1588     * to keep track of it in the cmd_buffer state
1589     */
1590    enum v3dv_ez_state ez_state;
1591 
1592    /* incompatible_ez_test can be dynamic since VK_EXT_extended_dynamic_state
1593     * so we need to keep track of it in the cmd_buffer state
1594     */
1595    bool incompatible_ez_test;
1596 };
1597 
1598 void
1599 v3dv_cmd_buffer_state_get_viewport_z_xform(struct v3dv_cmd_buffer *cmd_buffer,
1600                                            uint32_t vp_idx,
1601                                            float *translate_z, float *scale_z);
1602 
1603 /* The following struct represents the info from a descriptor that we store on
1604  * the host memory. They are mostly links to other existing vulkan objects,
1605  * like the image_view in order to access to swizzle info, or the buffer used
1606  * for a UBO/SSBO, for example.
1607  *
1608  * FIXME: revisit if makes sense to just move everything that would be needed
1609  * from a descriptor to the bo.
1610  */
1611 struct v3dv_descriptor {
1612    VkDescriptorType type;
1613 
1614    union {
1615       struct {
1616          struct v3dv_image_view *image_view;
1617          struct v3dv_sampler *sampler;
1618       };
1619 
1620       struct {
1621          struct v3dv_buffer *buffer;
1622          size_t offset;
1623          size_t range;
1624       };
1625 
1626       struct v3dv_buffer_view *buffer_view;
1627    };
1628 };
1629 
1630 struct v3dv_query {
1631    /* Used by queries where we implement result copying in the CPU so we can
1632     * tell if the relevant jobs have been submitted for execution. Currently
1633     * these are all but occlusion queries.
1634     */
1635    bool maybe_available;
1636 
1637    union {
1638       /* Used by occlusion queries */
1639       struct {
1640          /* Offset of this query in the occlusion query counter BO */
1641          uint32_t offset;
1642       } occlusion;
1643 
1644       /* Used by timestamp queries */
1645       struct {
1646          /* Offset of this query in the timestamp BO for its value */
1647          uint32_t offset;
1648 
1649          /* Syncobj to signal timestamp query availability */
1650          struct vk_sync *sync;
1651       } timestamp;
1652 
1653       /* Used by performance queries */
1654       struct v3dv_perf_query perf;
1655    };
1656 };
1657 
1658 struct v3dv_query_pool {
1659    struct vk_object_base base;
1660 
1661    /* Per-pool Vulkan resources required to implement GPU-side query
1662     * functions (only occlusion queries for now).
1663     */
1664    struct {
1665       /* Buffer to access the BO with the occlusion query results and
1666        * availability info.
1667        */
1668       VkBuffer buf;
1669       VkDeviceMemory mem;
1670 
1671       /* Descriptor set for accessing the buffer from a pipeline. */
1672       VkDescriptorPool descriptor_pool;
1673       VkDescriptorSet descriptor_set;
1674    } meta;
1675 
1676    /* Only used with occlusion queries */
1677    struct {
1678       /* BO with the occlusion counters and query availability */
1679       struct v3dv_bo *bo;
1680       /* Offset of the availability info in the BO */
1681       uint32_t avail_offset;
1682    } occlusion;
1683 
1684    /* Only used with timestamp queries */
1685    struct {
1686       /* BO with the query timestamp values */
1687       struct v3dv_bo *bo;
1688    } timestamp;
1689 
1690    /* Only used with performance queries */
1691    struct {
1692       uint32_t ncounters;
1693       uint8_t counters[V3D_MAX_PERFCNT];
1694 
1695       /* V3D has a limit on the number of counters we can track in a
1696        * single performance monitor, so if too many counters are requested
1697        * we need to create multiple monitors to record all of them. This
1698        * field represents the number of monitors required for the number
1699        * of counters requested.
1700        */
1701       uint8_t nperfmons;
1702    } perfmon;
1703 
1704    VkQueryType query_type;
1705    uint32_t query_count;
1706    struct v3dv_query *queries;
1707 };
1708 
1709 VkResult
1710 v3dv_query_allocate_resources(struct v3dv_device *decice);
1711 
1712 void
1713 v3dv_query_free_resources(struct v3dv_device *decice);
1714 
1715 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1716                                          struct v3dv_query_pool *pool,
1717                                          uint32_t first,
1718                                          uint32_t count,
1719                                          void *data,
1720                                          VkDeviceSize stride,
1721                                          VkQueryResultFlags flags);
1722 
1723 void v3dv_reset_query_pool_cpu(struct v3dv_device *device,
1724                                struct v3dv_query_pool *query_pool,
1725                                uint32_t first,
1726                                uint32_t last);
1727 
1728 void v3dv_cmd_buffer_emit_set_query_availability(struct v3dv_cmd_buffer *cmd_buffer,
1729                                                  struct v3dv_query_pool *pool,
1730                                                  uint32_t query, uint32_t count,
1731                                                  uint8_t availability);
1732 
1733 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1734                                                        uint64_t pobj,
1735                                                        VkAllocationCallbacks *alloc);
1736 struct v3dv_cmd_buffer_private_obj {
1737    struct list_head list_link;
1738    uint64_t obj;
1739    v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1740 };
1741 
1742 extern const struct vk_command_buffer_ops v3dv_cmd_buffer_ops;
1743 
1744 struct v3dv_cmd_buffer {
1745    struct vk_command_buffer vk;
1746 
1747    struct v3dv_device *device;
1748 
1749    VkCommandBufferUsageFlags usage_flags;
1750 
1751    enum v3dv_cmd_buffer_status status;
1752 
1753    struct v3dv_cmd_buffer_state state;
1754 
1755    /* Buffer where we upload push constant data to resolve indirect indexing */
1756    struct v3dv_cl_reloc push_constants_resource;
1757 
1758    /* Collection of Vulkan objects created internally by the driver (typically
1759     * during recording of meta operations) that are part of the command buffer
1760     * and should be destroyed with it.
1761     */
1762    struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1763 
1764    /* Per-command buffer resources for meta operations. */
1765    struct {
1766       struct {
1767          /* The current descriptor pool for blit sources */
1768          VkDescriptorPool dspool;
1769       } blit;
1770       struct {
1771          /* The current descriptor pool for texel buffer copy sources */
1772          VkDescriptorPool dspool;
1773       } texel_buffer_copy;
1774       struct {
1775          /* The current descriptor pool for the copy query results output buffer */
1776          VkDescriptorPool dspool;
1777       } query;
1778    } meta;
1779 
1780    /* List of jobs in the command buffer. For primary command buffers it
1781     * represents the jobs we want to submit to the GPU. For secondary command
1782     * buffers it represents jobs that will be merged into a primary command
1783     * buffer via vkCmdExecuteCommands.
1784     */
1785    struct list_head jobs;
1786 };
1787 
1788 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1789                                            int32_t subpass_idx,
1790                                            enum v3dv_job_type type);
1791 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1792 
1793 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1794                                                uint32_t subpass_idx);
1795 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1796                                                 uint32_t subpass_idx);
1797 
1798 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1799 
1800 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1801                                      bool push_descriptor_state);
1802 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1803                                     bool needs_subpass_resume);
1804 
1805 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1806                                  struct v3dv_query_pool *pool,
1807                                  uint32_t query,
1808                                  VkQueryControlFlags flags);
1809 
1810 void v3dv_cmd_buffer_pause_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1811 void v3dv_cmd_buffer_resume_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
1812 
1813 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1814                                struct v3dv_query_pool *pool,
1815                                uint32_t query);
1816 
1817 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1818                                         struct v3dv_query_pool *pool,
1819                                         uint32_t first,
1820                                         uint32_t count,
1821                                         struct v3dv_buffer *dst,
1822                                         uint32_t offset,
1823                                         uint32_t stride,
1824                                         VkQueryResultFlags flags);
1825 
1826 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1827                                  struct drm_v3d_submit_tfu *tfu);
1828 
1829 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_device *device,
1830                                               struct v3dv_csd_indirect_cpu_job_info *info,
1831                                               const uint32_t *wg_counts);
1832 
1833 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1834                                      uint64_t obj,
1835                                      v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1836 
1837 void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst,
1838                                          struct v3dv_barrier_state *src);
1839 
1840 void v3dv_cmd_buffer_consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer,
1841                                       struct v3dv_job *job);
1842 
1843 bool v3dv_cmd_buffer_check_needs_load(const struct v3dv_cmd_buffer_state *state,
1844                                       VkImageAspectFlags aspect,
1845                                       uint32_t first_subpass_idx,
1846                                       VkAttachmentLoadOp load_op,
1847                                       uint32_t last_subpass_idx,
1848                                       VkAttachmentStoreOp store_op);
1849 
1850 bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state,
1851                                        VkImageAspectFlags aspect,
1852                                        uint32_t last_subpass_idx,
1853                                        VkAttachmentStoreOp store_op);
1854 
1855 void v3dv_cmd_buffer_emit_pipeline_barrier(struct v3dv_cmd_buffer *cmd_buffer,
1856                                            const VkDependencyInfo *info);
1857 
1858 bool v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1859                                     struct v3dv_image *dst,
1860                                     struct v3dv_image *src,
1861                                     const VkImageCopy2 *region);
1862 
1863 struct v3dv_event {
1864    struct vk_object_base base;
1865 
1866    /* Link in the device list of pre-allocated free events */
1867    struct list_head link;
1868 
1869    /* Each event gets a different index, which we use to compute the offset
1870     * in the BO we use to track their state (signaled vs reset).
1871     */
1872    uint32_t index;
1873 };
1874 
1875 VkResult
1876 v3dv_event_allocate_resources(struct v3dv_device *device);
1877 
1878 void
1879 v3dv_event_free_resources(struct v3dv_device *device);
1880 
1881 struct v3dv_shader_variant {
1882    enum broadcom_shader_stage stage;
1883 
1884    union {
1885       struct v3d_prog_data *base;
1886       struct v3d_vs_prog_data *vs;
1887       struct v3d_gs_prog_data *gs;
1888       struct v3d_fs_prog_data *fs;
1889       struct v3d_compute_prog_data *cs;
1890    } prog_data;
1891 
1892    /* We explicitly save the prog_data_size as it would make easier to
1893     * serialize
1894     */
1895    uint32_t prog_data_size;
1896 
1897    /* The assembly for this variant will be uploaded to a BO shared with all
1898     * other shader stages in that pipeline. This is the offset in that BO.
1899     */
1900    uint32_t assembly_offset;
1901 
1902    /* Note: don't assume qpu_insts to be always NULL or not-NULL. In general
1903     * we will try to free it as soon as we upload it to the shared bo while we
1904     * compile the different stages. But we can decide to keep it around based
1905     * on some pipeline creation flags, like
1906     * VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT.
1907     */
1908    uint64_t *qpu_insts;
1909    uint32_t qpu_insts_size;
1910 };
1911 
1912 /*
1913  * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1914  * other methods doesn't have so many parameters.
1915  *
1916  * FIXME: for the case of the coordinate shader and the vertex shader, module,
1917  * entrypoint, spec_info and nir are the same. There are also info only
1918  * relevant to some stages. But seemed too much a hassle to create a new
1919  * struct only to handle that. Revisit if such kind of info starts to grow.
1920  */
1921 struct v3dv_pipeline_stage {
1922    struct v3dv_pipeline *pipeline;
1923 
1924    enum broadcom_shader_stage stage;
1925 
1926    const struct vk_shader_module *module;
1927    const char *entrypoint;
1928    const VkSpecializationInfo *spec_info;
1929    const VkShaderModuleCreateInfo *module_info;
1930 
1931    nir_shader *nir;
1932 
1933    /* The following is the combined hash of module+entrypoint+spec_info+nir */
1934    unsigned char shader_sha1[20];
1935 
1936    /** A name for this program, so you can track it in shader-db output. */
1937    uint32_t program_id;
1938 
1939    VkPipelineCreationFeedback feedback;
1940 
1941    struct vk_pipeline_robustness_state robustness;
1942 };
1943 
1944 /* We are using the descriptor pool entry for two things:
1945  * * Track the allocated sets, so we can properly free it if needed
1946  * * Track the suballocated pool bo regions, so if some descriptor set is
1947  *   freed, the gap could be reallocated later.
1948  *
1949  * Those only make sense if the pool was not created with the flag
1950  * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1951  */
1952 struct v3dv_descriptor_pool_entry
1953 {
1954    struct v3dv_descriptor_set *set;
1955    /* Offset and size of the subregion allocated for this entry from the
1956     * pool->bo
1957     */
1958    uint32_t offset;
1959    uint32_t size;
1960 };
1961 
1962 struct v3dv_descriptor_pool {
1963    struct vk_object_base base;
1964 
1965    /* A list with all descriptor sets allocated from the pool. */
1966    struct list_head set_list;
1967 
1968    /* If this descriptor pool has been allocated for the driver for internal
1969     * use, typically to implement meta operations.
1970     */
1971    bool is_driver_internal;
1972 
1973    struct v3dv_bo *bo;
1974    /* Current offset at the descriptor bo. 0 means that we didn't use it for
1975     * any descriptor. If the descriptor bo is NULL, current offset is
1976     * meaningless
1977     */
1978    uint32_t current_offset;
1979 
1980    /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1981     * descriptor sets are handled as a whole as pool memory and handled by the
1982     * following pointers. If set, they are not used, and individually
1983     * descriptor sets are allocated/freed.
1984     */
1985    uint8_t *host_memory_base;
1986    uint8_t *host_memory_ptr;
1987    uint8_t *host_memory_end;
1988 
1989    uint32_t entry_count;
1990    uint32_t max_entry_count;
1991    struct v3dv_descriptor_pool_entry entries[0];
1992 };
1993 
1994 struct v3dv_descriptor_set {
1995    struct vk_object_base base;
1996 
1997    /* List link into the list of all sets allocated from the pool */
1998    struct list_head pool_link;
1999 
2000    struct v3dv_descriptor_pool *pool;
2001 
2002    struct v3dv_descriptor_set_layout *layout;
2003 
2004    /* Offset relative to the descriptor pool bo for this set */
2005    uint32_t base_offset;
2006 
2007    /* The descriptors below can be indexed (set/binding) using the set_layout
2008     */
2009    struct v3dv_descriptor descriptors[0];
2010 };
2011 
2012 struct v3dv_descriptor_set_binding_layout {
2013    VkDescriptorType type;
2014 
2015    /* Number of array elements in this binding */
2016    uint32_t array_size;
2017 
2018    /* Index into the flattened descriptor set */
2019    uint32_t descriptor_index;
2020 
2021    uint32_t dynamic_offset_count;
2022    uint32_t dynamic_offset_index;
2023 
2024    /* Offset into the descriptor set where this descriptor lives (final offset
2025     * on the descriptor bo need to take into account set->base_offset)
2026     */
2027    uint32_t descriptor_offset;
2028 
2029    /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
2030     * if there are no immutable samplers.
2031     */
2032    uint32_t immutable_samplers_offset;
2033 
2034    /* Descriptors for multiplanar combined image samplers are larger.
2035     * For mutable descriptors, this is always 1.
2036     */
2037    uint8_t plane_stride;
2038 };
2039 
2040 struct v3dv_descriptor_set_layout {
2041    struct vk_object_base base;
2042 
2043    VkDescriptorSetLayoutCreateFlags flags;
2044 
2045    /* Number of bindings in this descriptor set */
2046    uint32_t binding_count;
2047 
2048    /* Total bo size needed for this descriptor set
2049     */
2050    uint32_t bo_size;
2051 
2052    /* Shader stages affected by this descriptor set */
2053    uint16_t shader_stages;
2054 
2055    /* Number of dynamic offsets used by this descriptor set */
2056    uint16_t dynamic_offset_count;
2057 
2058    /* Number of descriptors in this descriptor set */
2059    uint32_t descriptor_count;
2060 
2061    /* Descriptor set layouts can be destroyed even if they are still being
2062     * used.
2063     */
2064    uint32_t ref_cnt;
2065 
2066    /* Bindings in this descriptor set */
2067    struct v3dv_descriptor_set_binding_layout binding[0];
2068 };
2069 
2070 void
2071 v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
2072                                    struct v3dv_descriptor_set_layout *set_layout);
2073 
2074 static inline void
v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout * set_layout)2075 v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
2076 {
2077    assert(set_layout && set_layout->ref_cnt >= 1);
2078    p_atomic_inc(&set_layout->ref_cnt);
2079 }
2080 
2081 static inline void
v3dv_descriptor_set_layout_unref(struct v3dv_device * device,struct v3dv_descriptor_set_layout * set_layout)2082 v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
2083                                  struct v3dv_descriptor_set_layout *set_layout)
2084 {
2085    assert(set_layout && set_layout->ref_cnt >= 1);
2086    if (p_atomic_dec_zero(&set_layout->ref_cnt))
2087       v3dv_descriptor_set_layout_destroy(device, set_layout);
2088 }
2089 
2090 struct v3dv_pipeline_layout {
2091    struct vk_object_base base;
2092 
2093    struct {
2094       struct v3dv_descriptor_set_layout *layout;
2095       uint32_t dynamic_offset_start;
2096    } set[MAX_SETS];
2097 
2098    uint32_t num_sets;
2099 
2100    /* Shader stages that are declared to use descriptors from this layout */
2101    uint32_t shader_stages;
2102 
2103    uint32_t dynamic_offset_count;
2104    uint32_t push_constant_size;
2105 
2106    /* Pipeline layouts can be destroyed after creating pipelines since
2107     * maintenance4.
2108     */
2109    uint32_t ref_cnt;
2110 
2111    unsigned char sha1[20];
2112 };
2113 
2114 void
2115 v3dv_pipeline_layout_destroy(struct v3dv_device *device,
2116                              struct v3dv_pipeline_layout *layout,
2117                              const VkAllocationCallbacks *alloc);
2118 
2119 static inline void
v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout * layout)2120 v3dv_pipeline_layout_ref(struct v3dv_pipeline_layout *layout)
2121 {
2122    assert(layout && layout->ref_cnt >= 1);
2123    p_atomic_inc(&layout->ref_cnt);
2124 }
2125 
2126 static inline void
v3dv_pipeline_layout_unref(struct v3dv_device * device,struct v3dv_pipeline_layout * layout,const VkAllocationCallbacks * alloc)2127 v3dv_pipeline_layout_unref(struct v3dv_device *device,
2128                            struct v3dv_pipeline_layout *layout,
2129                            const VkAllocationCallbacks *alloc)
2130 {
2131    assert(layout && layout->ref_cnt >= 1);
2132    if (p_atomic_dec_zero(&layout->ref_cnt))
2133       v3dv_pipeline_layout_destroy(device, layout, alloc);
2134 }
2135 
2136 /*
2137  * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
2138  * it to be big enough to include the max value for all of them.
2139  *
2140  * FIXME: one alternative would be to allocate the map as big as you need for
2141  * each descriptor type. That would means more individual allocations.
2142  */
2143 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS,                         \
2144                                  MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
2145                                  MAX_STORAGE_BUFFERS)
2146 
2147 
2148 struct v3dv_descriptor_map {
2149    /* FIXME: avoid fixed size array/justify the size */
2150    unsigned num_desc; /* Number of descriptors  */
2151    int set[DESCRIPTOR_MAP_SIZE];
2152    int binding[DESCRIPTOR_MAP_SIZE];
2153    int array_index[DESCRIPTOR_MAP_SIZE];
2154    int array_size[DESCRIPTOR_MAP_SIZE];
2155    uint8_t plane[DESCRIPTOR_MAP_SIZE];
2156    bool used[DESCRIPTOR_MAP_SIZE];
2157 
2158    /* NOTE: the following is only for sampler, but this is the easier place to
2159     * put it.
2160     */
2161    uint8_t return_size[DESCRIPTOR_MAP_SIZE];
2162 };
2163 
2164 struct v3dv_sampler {
2165    struct vk_object_base base;
2166    struct vk_ycbcr_conversion *conversion;
2167 
2168    bool compare_enable;
2169    bool unnormalized_coordinates;
2170 
2171    /* Prepacked per plane SAMPLER_STATE, that is referenced as part of the tmu
2172     * configuration. If needed it will be copied to the descriptor info during
2173     * UpdateDescriptorSets
2174     */
2175    uint8_t plane_count;
2176    uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
2177 };
2178 
2179 /* We keep two special values for the sampler idx that represents exactly when a
2180  * sampler is not needed/provided. The main use is that even if we don't have
2181  * sampler, we still need to do the output unpacking (through
2182  * nir_lower_tex). The easier way to do this is to add those special "no
2183  * sampler" in the sampler_map, and then use the proper unpacking for that
2184  * case.
2185  *
2186  * We have one when we want a 16bit output size, and other when we want a
2187  * 32bit output size. We use the info coming from the RelaxedPrecision
2188  * decoration to decide between one and the other.
2189  */
2190 #define V3DV_NO_SAMPLER_16BIT_IDX 0
2191 #define V3DV_NO_SAMPLER_32BIT_IDX 1
2192 
2193 struct v3dv_descriptor_maps {
2194    struct v3dv_descriptor_map ubo_map;
2195    struct v3dv_descriptor_map ssbo_map;
2196    struct v3dv_descriptor_map sampler_map;
2197    struct v3dv_descriptor_map texture_map;
2198 };
2199 
2200 /* The structure represents data shared between different objects, like the
2201  * pipeline and the pipeline cache, so we ref count it to know when it should
2202  * be freed.
2203  */
2204 struct v3dv_pipeline_shared_data {
2205    uint32_t ref_cnt;
2206 
2207    unsigned char sha1_key[20];
2208 
2209    struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
2210    struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
2211 
2212    struct v3dv_bo *assembly_bo;
2213 };
2214 
2215 struct v3dv_pipeline_executable_data {
2216    enum broadcom_shader_stage stage;
2217    char *nir_str;
2218    char *qpu_str;
2219 };
2220 
2221 struct v3dv_pipeline {
2222    struct vk_object_base base;
2223 
2224    struct v3dv_device *device;
2225 
2226    VkShaderStageFlags active_stages;
2227    VkPipelineCreateFlagBits2KHR flags;
2228 
2229    struct v3dv_render_pass *pass;
2230    struct v3dv_subpass *subpass;
2231 
2232    struct v3dv_pipeline_stage *stages[BROADCOM_SHADER_STAGES];
2233 
2234    /* For VK_KHR_dynamic_rendering */
2235    struct vk_render_pass_state rendering_info;
2236 
2237    /* Flags for whether optional pipeline stages are present, for convenience */
2238    bool has_gs;
2239 
2240    /* Whether any stage in this pipeline uses VK_KHR_buffer_device_address */
2241    bool uses_buffer_device_address;
2242 
2243    /* Spilling memory requirements */
2244    struct {
2245       struct v3dv_bo *bo;
2246       uint32_t size_per_thread;
2247    } spill;
2248 
2249    struct vk_dynamic_graphics_state dynamic_graphics_state;
2250    struct v3dv_dynamic_state dynamic;
2251 
2252    struct v3dv_pipeline_layout *layout;
2253 
2254    enum v3dv_ez_state ez_state;
2255 
2256    /* If ez_state is V3D_EZ_DISABLED, if the reason for disabling is that the
2257     * pipeline selects an incompatible depth test function.
2258     */
2259    bool incompatible_ez_test;
2260 
2261    bool rasterization_enabled;
2262    bool msaa;
2263    bool sample_rate_shading;
2264    uint32_t sample_mask;
2265 
2266    bool negative_one_to_one;
2267 
2268    /* Indexed by vertex binding. */
2269    struct v3dv_pipeline_vertex_binding {
2270       uint32_t instance_divisor;
2271    } vb[MAX_VBS];
2272    uint32_t vb_count;
2273 
2274    /* Note that a lot of info from VkVertexInputAttributeDescription is
2275     * already prepacked, so here we are only storing those that need recheck
2276     * later. The array must be indexed by driver location, since that is the
2277     * order in which we need to emit the attributes.
2278     */
2279    struct v3dv_pipeline_vertex_attrib {
2280       uint32_t binding;
2281       uint32_t offset;
2282       VkFormat vk_format;
2283    } va[MAX_VERTEX_ATTRIBS];
2284    uint32_t va_count;
2285 
2286    enum mesa_prim topology;
2287 
2288    bool line_smooth;
2289 
2290    struct v3dv_pipeline_shared_data *shared_data;
2291 
2292    /* It is the combined stages sha1, layout sha1, plus the pipeline key sha1. */
2293    unsigned char sha1[20];
2294 
2295    /* In general we can reuse v3dv_device->default_attribute_float, so note
2296     * that the following can be NULL. In 7.x this is not used, so it will be
2297     * always NULL.
2298     *
2299     * FIXME: the content of this BO will be small, so it could be improved to
2300     * be uploaded to a common BO. But as in most cases it will be NULL, it is
2301     * not a priority.
2302     */
2303    struct v3dv_bo *default_attribute_values;
2304 
2305    struct vpm_config vpm_cfg;
2306    struct vpm_config vpm_cfg_bin;
2307 
2308    /* If the pipeline should emit any of the stencil configuration packets */
2309    bool emit_stencil_cfg[2];
2310 
2311    /* Blend state */
2312    struct {
2313       /* Per-RT bit mask with blend enables */
2314       uint8_t enables;
2315       /* Per-RT prepacked blend config packets */
2316       uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
2317       /* Flag indicating whether the blend factors in use require
2318        * color constants.
2319        */
2320       bool needs_color_constants;
2321       /* Mask with enabled color channels for each RT (4 bits per RT) */
2322       uint32_t color_write_masks;
2323    } blend;
2324 
2325    struct {
2326       void *mem_ctx;
2327       struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */
2328    } executables;
2329 
2330    /* Packets prepacked during pipeline creation
2331     */
2332    uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
2333    uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
2334    uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
2335    uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
2336                         MAX_VERTEX_ATTRIBS];
2337    uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
2338 };
2339 
2340 static inline bool
v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device * device)2341 v3dv_texture_shader_state_has_rb_swap_reverse_bits(const struct v3dv_device *device)
2342 {
2343    return device->devinfo.ver > 71 ||
2344           (device->devinfo.ver == 71 && device->devinfo.rev >= 5);
2345 }
2346 
2347 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)2348 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
2349 {
2350    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
2351           !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
2352    return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
2353       VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
2354 }
2355 
2356 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)2357 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
2358                                      struct v3dv_pipeline *pipeline)
2359 {
2360    if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
2361       return &cmd_buffer->state.compute.descriptor_state;
2362    else
2363       return &cmd_buffer->state.gfx.descriptor_state;
2364 }
2365 
2366 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo);
2367 
2368 uint32_t v3dv_physical_device_vendor_id(const struct v3dv_physical_device *dev);
2369 uint32_t v3dv_physical_device_device_id(const struct v3dv_physical_device *dev);
2370 
2371 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f,
2372                                        uint8_t plane);
2373 const struct v3dv_format *
2374 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
2375                                uint32_t bpp, VkFormat *out_vk_format);
2376 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
2377                                           VkFormat vk_format,
2378                                           VkFormatFeatureFlags2 features);
2379 
2380 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2381                                          struct v3dv_pipeline *pipeline,
2382                                          struct v3dv_shader_variant *variant);
2383 
2384 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
2385                                                     struct v3dv_pipeline *pipeline,
2386                                                     struct v3dv_shader_variant *variant,
2387                                                     uint32_t **wg_count_offsets);
2388 
2389 struct v3dv_shader_variant *
2390 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
2391                         struct v3dv_pipeline_cache *cache,
2392                         struct v3d_key *key,
2393                         size_t key_size,
2394                         const VkAllocationCallbacks *pAllocator,
2395                         VkResult *out_vk_result);
2396 
2397 struct v3dv_shader_variant *
2398 v3dv_shader_variant_create(struct v3dv_device *device,
2399                            enum broadcom_shader_stage stage,
2400                            struct v3d_prog_data *prog_data,
2401                            uint32_t prog_data_size,
2402                            uint32_t assembly_offset,
2403                            uint64_t *qpu_insts,
2404                            uint32_t qpu_insts_size,
2405                            VkResult *out_vk_result);
2406 
2407 void
2408 v3dv_shader_variant_destroy(struct v3dv_device *device,
2409                             struct v3dv_shader_variant *variant);
2410 
2411 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)2412 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2413 {
2414    assert(shared_data && shared_data->ref_cnt >= 1);
2415    p_atomic_inc(&shared_data->ref_cnt);
2416 }
2417 
2418 void
2419 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2420                                   struct v3dv_pipeline_shared_data *shared_data);
2421 
2422 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)2423 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2424                                 struct v3dv_pipeline_shared_data *shared_data)
2425 {
2426    assert(shared_data && shared_data->ref_cnt >= 1);
2427    if (p_atomic_dec_zero(&shared_data->ref_cnt))
2428       v3dv_pipeline_shared_data_destroy(device, shared_data);
2429 }
2430 
2431 struct v3dv_descriptor *
2432 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2433                                    struct v3dv_descriptor_map *map,
2434                                    struct v3dv_pipeline_layout *pipeline_layout,
2435                                    uint32_t index,
2436                                    uint32_t *dynamic_offset);
2437 
2438 struct v3dv_cl_reloc
2439 v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2440                                       struct v3dv_descriptor_state *descriptor_state,
2441                                       struct v3dv_descriptor_map *map,
2442                                       struct v3dv_pipeline_layout *pipeline_layout,
2443                                       uint32_t index,
2444                                       VkDescriptorType *out_type);
2445 
2446 const struct v3dv_sampler *
2447 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2448                                 struct v3dv_descriptor_map *map,
2449                                 struct v3dv_pipeline_layout *pipeline_layout,
2450                                 uint32_t index);
2451 
2452 struct v3dv_cl_reloc
2453 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2454                                       struct v3dv_descriptor_state *descriptor_state,
2455                                       struct v3dv_descriptor_map *map,
2456                                       struct v3dv_pipeline_layout *pipeline_layout,
2457                                       uint32_t index);
2458 
2459 struct v3dv_cl_reloc
2460 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2461                                              struct v3dv_descriptor_state *descriptor_state,
2462                                              struct v3dv_descriptor_map *map,
2463                                              struct v3dv_pipeline_layout *pipeline_layout,
2464                                              uint32_t index);
2465 
2466 struct v3dv_bo*
2467 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2468                                    struct v3dv_descriptor_map *map,
2469                                    struct v3dv_pipeline_layout *pipeline_layout,
2470                                    uint32_t index);
2471 
2472 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)2473 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2474                         const struct v3dv_descriptor_set_binding_layout *binding)
2475 {
2476    assert(binding->immutable_samplers_offset);
2477    return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2478 }
2479 
2480 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2481                               struct v3dv_device *device,
2482                               VkPipelineCacheCreateFlags,
2483                               bool cache_enabled);
2484 
2485 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2486 
2487 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2488                                     struct v3dv_pipeline_cache *cache,
2489                                     nir_shader *nir,
2490                                     unsigned char sha1_key[20]);
2491 
2492 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2493                                                struct v3dv_pipeline_cache *cache,
2494                                                const nir_shader_compiler_options *nir_options,
2495                                                unsigned char sha1_key[20]);
2496 
2497 struct v3dv_pipeline_shared_data *
2498 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2499                                         unsigned char sha1_key[20],
2500                                         bool *cache_hit);
2501 
2502 void
2503 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2504                                     struct v3dv_pipeline_cache *cache);
2505 
2506 VkResult
2507 v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
2508                                       nir_shader *nir,
2509                                       VkPipelineLayout pipeline_layout,
2510                                       VkPipeline *pipeline);
2511 
2512 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle)			\
2513    VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2514 
2515 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2516                        VK_OBJECT_TYPE_COMMAND_BUFFER)
2517 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2518 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2519                        VK_OBJECT_TYPE_INSTANCE)
2520 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2521                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2522 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2523 
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer,base,VkBuffer,VK_OBJECT_TYPE_BUFFER)2524 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2525                                VK_OBJECT_TYPE_BUFFER)
2526 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2527                                VK_OBJECT_TYPE_BUFFER_VIEW)
2528 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, vk.base, VkDeviceMemory,
2529                                VK_OBJECT_TYPE_DEVICE_MEMORY)
2530 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2531                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2532 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2533                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
2534 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2535                                VkDescriptorSetLayout,
2536                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2537 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2538 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2539                                VK_OBJECT_TYPE_FRAMEBUFFER)
2540 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2541                                VK_OBJECT_TYPE_IMAGE)
2542 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2543                                VK_OBJECT_TYPE_IMAGE_VIEW)
2544 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2545                                VK_OBJECT_TYPE_PIPELINE)
2546 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2547                                VK_OBJECT_TYPE_PIPELINE_CACHE)
2548 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2549                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2550 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2551                                VK_OBJECT_TYPE_QUERY_POOL)
2552 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2553                                VK_OBJECT_TYPE_RENDER_PASS)
2554 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2555                                VK_OBJECT_TYPE_SAMPLER)
2556 
2557 /* Flags OOM conditions in command buffer state.
2558  *
2559  * Note: notice that no-op jobs don't have a command buffer reference.
2560  */
2561 static inline void
2562 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2563 {
2564    if (cmd_buffer) {
2565       cmd_buffer->state.oom = true;
2566    } else {
2567       assert(job);
2568       if (job->cmd_buffer)
2569          job->cmd_buffer->state.oom = true;
2570    }
2571 }
2572 
2573 #define v3dv_return_if_oom(_cmd_buffer, _job) do {                  \
2574    const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer;        \
2575    if (__cmd_buffer && __cmd_buffer->state.oom)                     \
2576       return;                                                       \
2577    const struct v3dv_job *__job = _job;                             \
2578    if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom)  \
2579       return;                                                       \
2580 } while(0)                                                          \
2581 
2582 static inline uint32_t
u64_hash(const void * key)2583 u64_hash(const void *key)
2584 {
2585    return _mesa_hash_data(key, sizeof(uint64_t));
2586 }
2587 
2588 static inline bool
u64_compare(const void * key1,const void * key2)2589 u64_compare(const void *key1, const void *key2)
2590 {
2591    return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2592 }
2593 
2594 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2595  * define v3dX for each version supported, because when we compile code that
2596  * is not version-specific, all version-specific macros need to be already
2597  * defined.
2598  */
2599 #ifdef v3dX
2600 #  include "v3dvx_private.h"
2601 #else
2602 #  define v3dX(x) v3d42_##x
2603 #  include "v3dvx_private.h"
2604 #  undef v3dX
2605 
2606 #  define v3dX(x) v3d71_##x
2607 #  include "v3dvx_private.h"
2608 #  undef v3dX
2609 #endif
2610 
2611 VkResult
2612 v3dv_update_image_layout(struct v3dv_device *device,
2613                          struct v3dv_image *image,
2614                          uint64_t modifier,
2615                          bool disjoint,
2616                          const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info);
2617 
2618 float
2619 v3dv_get_aa_line_width(struct v3dv_pipeline *pipeline,
2620                        struct v3dv_cmd_buffer *buffer);
2621 
2622 
2623 void
2624 v3dv_compute_ez_state(struct vk_dynamic_graphics_state *dyn,
2625                       struct v3dv_pipeline *pipeline,
2626                       enum v3dv_ez_state *ez_state,
2627                       bool *incompatible_ez_test);
2628 
2629 uint32_t v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim);
2630 
2631 #endif /* V3DV_PRIVATE_H */
2632