• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32 
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38 
39 #include "vk_device.h"
40 #include "vk_format.h"
41 #include "vk_instance.h"
42 #include "vk_image.h"
43 #include "vk_log.h"
44 #include "vk_physical_device.h"
45 #include "vk_shader_module.h"
46 #include "vk_sync.h"
47 #include "vk_sync_timeline.h"
48 #include "vk_util.h"
49 
50 #include "vk_command_buffer.h"
51 #include "vk_command_pool.h"
52 #include "vk_queue.h"
53 
54 #include <xf86drm.h>
55 
56 #ifdef HAVE_VALGRIND
57 #include <valgrind.h>
58 #include <memcheck.h>
59 #define VG(x) x
60 #else
61 #define VG(x) ((void)0)
62 #endif
63 
64 #include "v3dv_limits.h"
65 
66 #include "common/v3d_device_info.h"
67 #include "common/v3d_limits.h"
68 #include "common/v3d_tiling.h"
69 #include "common/v3d_util.h"
70 
71 #include "compiler/shader_enums.h"
72 #include "compiler/spirv/nir_spirv.h"
73 
74 #include "compiler/v3d_compiler.h"
75 
76 #include "vk_debug_report.h"
77 #include "util/set.h"
78 #include "util/hash_table.h"
79 #include "util/sparse_array.h"
80 #include "util/xmlconfig.h"
81 #include "u_atomic.h"
82 
83 #include "v3dv_entrypoints.h"
84 #include "v3dv_bo.h"
85 
86 #include "drm-uapi/v3d_drm.h"
87 
88 #include "vk_alloc.h"
89 #include "simulator/v3d_simulator.h"
90 
91 #include "v3dv_cl.h"
92 
93 #include "wsi_common.h"
94 
95 /* A non-fatal assert.  Useful for debugging. */
96 #ifdef DEBUG
97 #define v3dv_assert(x) ({ \
98    if (unlikely(!(x))) \
99       fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
100 })
101 #else
102 #define v3dv_assert(x)
103 #endif
104 
105 #define perf_debug(...) do {                       \
106    if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))       \
107       fprintf(stderr, __VA_ARGS__);                \
108 } while (0)
109 
110 struct v3dv_instance;
111 
112 #ifdef USE_V3D_SIMULATOR
113 #define using_v3d_simulator true
114 #else
115 #define using_v3d_simulator false
116 #endif
117 
118 struct v3d_simulator_file;
119 
120 /* Minimum required by the Vulkan 1.1 spec */
121 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
122 
123 struct v3dv_physical_device {
124    struct vk_physical_device vk;
125 
126    char *name;
127    int32_t render_fd;
128    int32_t display_fd;
129    int32_t master_fd;
130 
131    /* We need these because it is not clear how to detect
132     * valid devids in a portable way
133      */
134    bool has_primary;
135    bool has_render;
136 
137    dev_t primary_devid;
138    dev_t render_devid;
139 
140 #if using_v3d_simulator
141    uint32_t device_id;
142 #endif
143 
144    uint8_t driver_build_sha1[20];
145    uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
146    uint8_t device_uuid[VK_UUID_SIZE];
147    uint8_t driver_uuid[VK_UUID_SIZE];
148 
149    struct vk_sync_type drm_syncobj_type;
150    struct vk_sync_timeline_type sync_timeline_type;
151    const struct vk_sync_type *sync_types[3];
152 
153    struct disk_cache *disk_cache;
154 
155    mtx_t mutex;
156 
157    struct wsi_device wsi_device;
158 
159    VkPhysicalDeviceMemoryProperties memory;
160 
161    struct v3d_device_info devinfo;
162 
163    struct v3d_simulator_file *sim_file;
164 
165    const struct v3d_compiler *compiler;
166    uint32_t next_program_id;
167 
168    /* This array holds all our 'struct v3dv_bo' allocations. We use this
169     * so we can add a refcount to our BOs and check if a particular BO
170     * was already allocated in this device using its GEM handle. This is
171     * necessary to properly manage BO imports, because the kernel doesn't
172     * refcount the underlying BO memory.
173     *
174     * Specifically, when self-importing (i.e. importing a BO into the same
175     * device that created it), the kernel will give us the same BO handle
176     * for both BOs and we must only free it once when  both references are
177     * freed. Otherwise, if we are not self-importing, we get two differnt BO
178     * handles, and we want to free each one individually.
179     *
180     * The BOs in this map all have a refcnt with the referece counter and
181     * only self-imported BOs will ever have a refcnt > 1.
182     */
183    struct util_sparse_array bo_map;
184 
185    struct {
186       bool merge_jobs;
187    } options;
188 
189    struct {
190       bool multisync;
191       bool perfmon;
192    } caps;
193 };
194 
195 VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
196                                               struct v3dv_physical_device *pdevice,
197                                               VkIcdSurfaceBase *surface);
198 
199 static inline struct v3dv_bo *
v3dv_device_lookup_bo(struct v3dv_physical_device * device,uint32_t handle)200 v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
201 {
202    return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
203 }
204 
205 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
206 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
207 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
208                                                      uint32_t index);
209 
210 void v3dv_meta_clear_init(struct v3dv_device *device);
211 void v3dv_meta_clear_finish(struct v3dv_device *device);
212 
213 void v3dv_meta_blit_init(struct v3dv_device *device);
214 void v3dv_meta_blit_finish(struct v3dv_device *device);
215 
216 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
217 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
218 
219 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
220                            const VkOffset3D *offset,
221                            VkFormat *compat_format);
222 
223 struct v3dv_instance {
224    struct vk_instance vk;
225 
226    int physicalDeviceCount;
227    struct v3dv_physical_device physicalDevice;
228 
229    bool pipeline_cache_enabled;
230    bool default_pipeline_cache_enabled;
231 };
232 
233 /* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
234  * tfu), we still need a syncobj to track the last overall job submitted
235  * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
236  * start expecting multisync to be present and drop the legacy implementation
237  * together with this V3DV_QUEUE_ANY tracker.
238  */
239 enum v3dv_queue_type {
240    V3DV_QUEUE_CL = 0,
241    V3DV_QUEUE_CSD,
242    V3DV_QUEUE_TFU,
243    V3DV_QUEUE_ANY,
244    V3DV_QUEUE_COUNT,
245 };
246 
247 /* For each GPU queue, we use a syncobj to track the last job submitted. We
248  * set the flag `first` to determine when we are starting a new cmd buffer
249  * batch and therefore a job submitted to a given queue will be the first in a
250  * cmd buf batch.
251  */
252 struct v3dv_last_job_sync {
253    /* If the job is the first submitted to a GPU queue in a cmd buffer batch.
254     *
255     * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync.
256     */
257    bool first[V3DV_QUEUE_COUNT];
258    /* Array of syncobj to track the last job submitted to a GPU queue.
259     *
260     * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each
261     * queue, but without multisync we only track the last job submitted to any
262     * queue in V3DV_QUEUE_ANY.
263     */
264    uint32_t syncs[V3DV_QUEUE_COUNT];
265 };
266 
267 struct v3dv_queue {
268    struct vk_queue vk;
269 
270    struct v3dv_device *device;
271 
272    struct v3dv_last_job_sync last_job_syncs;
273 
274    struct v3dv_job *noop_job;
275 
276    /* The last active perfmon ID to prevent mixing of counter results when a
277     * job is submitted with a different perfmon id.
278     */
279    uint32_t last_perfmon_id;
280 };
281 
282 VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
283                                   struct vk_queue_submit *submit);
284 
285 #define V3DV_META_BLIT_CACHE_KEY_SIZE              (4 * sizeof(uint32_t))
286 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
287                                                     sizeof(VkComponentMapping))
288 
289 struct v3dv_meta_color_clear_pipeline {
290    VkPipeline pipeline;
291    VkRenderPass pass;
292    bool cached;
293    uint64_t key;
294 };
295 
296 struct v3dv_meta_depth_clear_pipeline {
297    VkPipeline pipeline;
298    uint64_t key;
299 };
300 
301 struct v3dv_meta_blit_pipeline {
302    VkPipeline pipeline;
303    VkRenderPass pass;
304    VkRenderPass pass_no_load;
305    uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
306 };
307 
308 struct v3dv_meta_texel_buffer_copy_pipeline {
309    VkPipeline pipeline;
310    VkRenderPass pass;
311    VkRenderPass pass_no_load;
312    uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
313 };
314 
315 struct v3dv_pipeline_key {
316    bool robust_buffer_access;
317    uint8_t topology;
318    uint8_t logicop_func;
319    bool msaa;
320    bool sample_coverage;
321    bool sample_alpha_to_coverage;
322    bool sample_alpha_to_one;
323    uint8_t cbufs;
324    struct {
325       enum pipe_format format;
326       uint8_t swizzle[4];
327    } color_fmt[V3D_MAX_DRAW_BUFFERS];
328    uint8_t f32_color_rb;
329    uint32_t va_swap_rb_mask;
330    bool has_multiview;
331 };
332 
333 struct v3dv_pipeline_cache_stats {
334    uint32_t miss;
335    uint32_t hit;
336    uint32_t count;
337    uint32_t on_disk_hit;
338 };
339 
340 /* Equivalent to gl_shader_stage, but including the coordinate shaders
341  *
342  * FIXME: perhaps move to common
343  */
344 enum broadcom_shader_stage {
345    BROADCOM_SHADER_VERTEX,
346    BROADCOM_SHADER_VERTEX_BIN,
347    BROADCOM_SHADER_GEOMETRY,
348    BROADCOM_SHADER_GEOMETRY_BIN,
349    BROADCOM_SHADER_FRAGMENT,
350    BROADCOM_SHADER_COMPUTE,
351 };
352 
353 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
354 
355 /* Assumes that coordinate shaders will be custom-handled by the caller */
356 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)357 gl_shader_stage_to_broadcom(gl_shader_stage stage)
358 {
359    switch (stage) {
360    case MESA_SHADER_VERTEX:
361       return BROADCOM_SHADER_VERTEX;
362    case MESA_SHADER_GEOMETRY:
363       return BROADCOM_SHADER_GEOMETRY;
364    case MESA_SHADER_FRAGMENT:
365       return BROADCOM_SHADER_FRAGMENT;
366    case MESA_SHADER_COMPUTE:
367       return BROADCOM_SHADER_COMPUTE;
368    default:
369       unreachable("Unknown gl shader stage");
370    }
371 }
372 
373 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)374 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
375 {
376    switch (stage) {
377    case BROADCOM_SHADER_VERTEX:
378    case BROADCOM_SHADER_VERTEX_BIN:
379       return MESA_SHADER_VERTEX;
380    case BROADCOM_SHADER_GEOMETRY:
381    case BROADCOM_SHADER_GEOMETRY_BIN:
382       return MESA_SHADER_GEOMETRY;
383    case BROADCOM_SHADER_FRAGMENT:
384       return MESA_SHADER_FRAGMENT;
385    case BROADCOM_SHADER_COMPUTE:
386       return MESA_SHADER_COMPUTE;
387    default:
388       unreachable("Unknown broadcom shader stage");
389    }
390 }
391 
392 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)393 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
394 {
395    switch (stage) {
396    case BROADCOM_SHADER_VERTEX_BIN:
397    case BROADCOM_SHADER_GEOMETRY_BIN:
398       return true;
399    default:
400       return false;
401    }
402 }
403 
404 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)405 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
406 {
407    switch (stage) {
408    case BROADCOM_SHADER_VERTEX:
409    case BROADCOM_SHADER_GEOMETRY:
410       return true;
411    default:
412       return false;
413    }
414 }
415 
416 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)417 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
418 {
419    switch (stage) {
420    case BROADCOM_SHADER_VERTEX:
421       return BROADCOM_SHADER_VERTEX_BIN;
422    case BROADCOM_SHADER_GEOMETRY:
423       return BROADCOM_SHADER_GEOMETRY_BIN;
424    default:
425       unreachable("Invalid shader stage");
426    }
427 }
428 
429 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)430 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
431 {
432    switch(stage) {
433    case BROADCOM_SHADER_VERTEX_BIN:
434       return "MESA_SHADER_VERTEX_BIN";
435    case BROADCOM_SHADER_GEOMETRY_BIN:
436       return "MESA_SHADER_GEOMETRY_BIN";
437    default:
438       return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
439    }
440 }
441 
442 struct v3dv_pipeline_cache {
443    struct vk_object_base base;
444 
445    struct v3dv_device *device;
446    mtx_t mutex;
447 
448    struct hash_table *nir_cache;
449    struct v3dv_pipeline_cache_stats nir_stats;
450 
451    struct hash_table *cache;
452    struct v3dv_pipeline_cache_stats stats;
453 
454    /* For VK_EXT_pipeline_creation_cache_control. */
455    bool externally_synchronized;
456 };
457 
458 struct v3dv_device {
459    struct vk_device vk;
460 
461    struct v3dv_instance *instance;
462    struct v3dv_physical_device *pdevice;
463 
464    struct v3d_device_info devinfo;
465    struct v3dv_queue queue;
466 
467    /* Guards query->maybe_available and value for timestamps */
468    mtx_t query_mutex;
469 
470    /* Signaled whenever a query is ended */
471    cnd_t query_ended;
472 
473    /* Resources used for meta operations */
474    struct {
475       mtx_t mtx;
476       struct {
477          VkPipelineLayout p_layout;
478          struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
479       } color_clear;
480       struct {
481          VkPipelineLayout p_layout;
482          struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
483       } depth_clear;
484       struct {
485          VkDescriptorSetLayout ds_layout;
486          VkPipelineLayout p_layout;
487          struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
488       } blit;
489       struct {
490          VkDescriptorSetLayout ds_layout;
491          VkPipelineLayout p_layout;
492          struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
493       } texel_buffer_copy;
494    } meta;
495 
496    struct v3dv_bo_cache {
497       /** List of struct v3d_bo freed, by age. */
498       struct list_head time_list;
499       /** List of struct v3d_bo freed, per size, by age. */
500       struct list_head *size_list;
501       uint32_t size_list_size;
502 
503       mtx_t lock;
504 
505       uint32_t cache_size;
506       uint32_t cache_count;
507       uint32_t max_cache_size;
508    } bo_cache;
509 
510    uint32_t bo_size;
511    uint32_t bo_count;
512 
513    struct v3dv_pipeline_cache default_pipeline_cache;
514 
515    /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
516     * following covers the most common case, that is all attributes format
517     * being float being float, allowing us to reuse the same BO for all
518     * pipelines matching this requirement. Pipelines that need integer
519     * attributes will create their own BO.
520     */
521    struct v3dv_bo *default_attribute_float;
522    VkPhysicalDeviceFeatures features;
523 
524    void *device_address_mem_ctx;
525    struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */
526 
527 #ifdef ANDROID
528    const void *gralloc;
529    enum {
530       V3DV_GRALLOC_UNKNOWN,
531       V3DV_GRALLOC_CROS,
532       V3DV_GRALLOC_OTHER,
533    } gralloc_type;
534 #endif
535 };
536 
537 struct v3dv_device_memory {
538    struct vk_object_base base;
539 
540    struct v3dv_bo *bo;
541    const VkMemoryType *type;
542    bool is_for_wsi;
543    bool is_for_device_address;
544 };
545 
546 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
547 #define TEXTURE_DATA_FORMAT_NO     255
548 
549 struct v3dv_format {
550    bool supported;
551 
552    /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
553    uint8_t rt_type;
554 
555    /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
556    uint8_t tex_type;
557 
558    /* Swizzle to apply to the RGBA shader output for storing to the tile
559     * buffer, to the RGBA tile buffer to produce shader input (for
560     * blending), and for turning the rgba8888 texture sampler return
561     * value into shader rgba values.
562     */
563    uint8_t swizzle[4];
564 
565    /* Whether the return value is 16F/I/UI or 32F/I/UI. */
566    uint8_t return_size;
567 
568    /* If the format supports (linear) filtering when texturing. */
569    bool supports_filtering;
570 };
571 
572 struct v3d_resource_slice {
573    uint32_t offset;
574    uint32_t stride;
575    uint32_t padded_height;
576    /* Size of a single pane of the slice.  For 3D textures, there will be
577     * a number of panes equal to the minified, power-of-two-aligned
578     * depth.
579     */
580    uint32_t size;
581    uint8_t ub_pad;
582    enum v3d_tiling_mode tiling;
583    uint32_t padded_height_of_output_image_in_uif_blocks;
584 };
585 
586 bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
587 bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
588 
589 struct v3dv_image {
590    struct vk_image vk;
591 
592    const struct v3dv_format *format;
593    uint32_t cpp;
594    bool tiled;
595 
596    struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
597    uint64_t size; /* Total size in bytes */
598    uint32_t cube_map_stride;
599 
600    struct v3dv_device_memory *mem;
601    VkDeviceSize mem_offset;
602    uint32_t alignment;
603 
604 #ifdef ANDROID
605    /* Image is backed by VK_ANDROID_native_buffer, */
606    bool is_native_buffer_memory;
607 #endif
608 };
609 
610 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
611 
612 /* Pre-generating packets needs to consider changes in packet sizes across hw
613  * versions. Keep things simple and allocate enough space for any supported
614  * version. We ensure the size is large enough through static asserts.
615  */
616 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
617 #define V3DV_SAMPLER_STATE_LENGTH 24
618 #define V3DV_BLEND_CFG_LENGTH 5
619 #define V3DV_CFG_BITS_LENGTH 4
620 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
621 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
622 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
623 #define V3DV_STENCIL_CFG_LENGTH 6
624 
625 struct v3dv_image_view {
626    struct vk_image_view vk;
627 
628    const struct v3dv_format *format;
629    bool swap_rb;
630    bool channel_reverse;
631    uint32_t internal_bpp;
632    uint32_t internal_type;
633    uint32_t offset;
634 
635    /* Precomputed (composed from createinfo->components and formar swizzle)
636     * swizzles to pass in to the shader key.
637     *
638     * This could be also included on the descriptor bo, but the shader state
639     * packet doesn't need it on a bo, so we can just avoid a memory copy
640     */
641    uint8_t swizzle[4];
642 
643    /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
644     * during UpdateDescriptorSets.
645     *
646     * Empirical tests show that cube arrays need a different shader state
647     * depending on whether they are used with a sampler or not, so for these
648     * we generate two states and select the one to use based on the descriptor
649     * type.
650     */
651    uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
652 };
653 
654 VkResult v3dv_create_image_view(struct v3dv_device *device,
655                                 const VkImageViewCreateInfo *pCreateInfo,
656                                 VkImageView *pView);
657 
658 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
659 
660 struct v3dv_buffer {
661    struct vk_object_base base;
662 
663    VkDeviceSize size;
664    VkBufferUsageFlags usage;
665    uint32_t alignment;
666 
667    struct v3dv_device_memory *mem;
668    VkDeviceSize mem_offset;
669 };
670 
671 struct v3dv_buffer_view {
672    struct vk_object_base base;
673 
674    struct v3dv_buffer *buffer;
675 
676    VkFormat vk_format;
677    const struct v3dv_format *format;
678    uint32_t internal_bpp;
679    uint32_t internal_type;
680 
681    uint32_t offset;
682    uint32_t size;
683    uint32_t num_elements;
684 
685    /* Prepacked TEXTURE_SHADER_STATE. */
686    uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
687 };
688 
689 struct v3dv_subpass_attachment {
690    uint32_t attachment;
691    VkImageLayout layout;
692 };
693 
694 struct v3dv_subpass {
695    uint32_t input_count;
696    struct v3dv_subpass_attachment *input_attachments;
697 
698    uint32_t color_count;
699    struct v3dv_subpass_attachment *color_attachments;
700    struct v3dv_subpass_attachment *resolve_attachments;
701 
702    struct v3dv_subpass_attachment ds_attachment;
703    struct v3dv_subpass_attachment ds_resolve_attachment;
704    bool resolve_depth, resolve_stencil;
705 
706    /* If we need to emit the clear of the depth/stencil attachment using a
707     * a draw call instead of using the TLB (GFXH-1461).
708     */
709    bool do_depth_clear_with_draw;
710    bool do_stencil_clear_with_draw;
711 
712    /* Multiview */
713    uint32_t view_mask;
714 };
715 
716 struct v3dv_render_pass_attachment {
717    VkAttachmentDescription2 desc;
718 
719    uint32_t first_subpass;
720    uint32_t last_subpass;
721 
722    /* When multiview is enabled, we no longer care about when a particular
723     * attachment is first or last used in a render pass, since not all views
724     * in the attachment will meet that criteria. Instead, we need to track
725     * each individual view (layer) in each attachment and emit our stores,
726     * loads and clears accordingly.
727     */
728    struct {
729       uint32_t first_subpass;
730       uint32_t last_subpass;
731    } views[MAX_MULTIVIEW_VIEW_COUNT];
732 
733    /* If this is a multisampled attachment that is going to be resolved,
734     * whether we may be able to use the TLB hardware resolve based on the
735     * attachment format.
736     */
737    bool try_tlb_resolve;
738 };
739 
740 struct v3dv_render_pass {
741    struct vk_object_base base;
742 
743    bool multiview_enabled;
744 
745    uint32_t attachment_count;
746    struct v3dv_render_pass_attachment *attachments;
747 
748    uint32_t subpass_count;
749    struct v3dv_subpass *subpasses;
750 
751    struct v3dv_subpass_attachment *subpass_attachments;
752 };
753 
754 struct v3dv_framebuffer {
755    struct vk_object_base base;
756 
757    uint32_t width;
758    uint32_t height;
759    uint32_t layers;
760 
761    /* Typically, edge tiles in the framebuffer have padding depending on the
762     * underlying tiling layout. One consequnce of this is that when the
763     * framebuffer dimensions are not aligned to tile boundaries, tile stores
764     * would still write full tiles on the edges and write to the padded area.
765     * If the framebuffer is aliasing a smaller region of a larger image, then
766     * we need to be careful with this though, as we won't have padding on the
767     * edge tiles (which typically means that we need to load the tile buffer
768     * before we store).
769     */
770    bool has_edge_padding;
771 
772    uint32_t attachment_count;
773    uint32_t color_attachment_count;
774 
775    /* Notice that elements in 'attachments' will be NULL if the framebuffer
776     * was created imageless. The driver is expected to access attachment info
777     * from the command buffer state instead.
778     */
779    struct v3dv_image_view *attachments[0];
780 };
781 
782 struct v3dv_frame_tiling {
783    uint32_t width;
784    uint32_t height;
785    uint32_t layers;
786    uint32_t render_target_count;
787    uint32_t internal_bpp;
788    bool     msaa;
789    bool     double_buffer;
790    uint32_t tile_width;
791    uint32_t tile_height;
792    uint32_t draw_tiles_x;
793    uint32_t draw_tiles_y;
794    uint32_t supertile_width;
795    uint32_t supertile_height;
796    uint32_t frame_width_in_supertiles;
797    uint32_t frame_height_in_supertiles;
798 };
799 
800 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
801                                        const VkRect2D *area,
802                                        struct v3dv_framebuffer *fb,
803                                        struct v3dv_render_pass *pass,
804                                        uint32_t subpass_idx);
805 
806 /* Checks if we need to emit 2 initial tile clears for double buffer mode.
807  * This happens when we render at least 2 tiles, because in this mode each
808  * tile uses a different half of the tile buffer memory so we can have 2 tiles
809  * in flight (one being stored to memory and the next being rendered). In this
810  * scenario, if we emit a single initial tile clear we would only clear the
811  * first half of the tile buffer.
812  */
813 static inline bool
v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling * tiling)814 v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
815 {
816    return tiling->double_buffer &&
817           (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
818            tiling->layers > 1);
819 }
820 
821 enum v3dv_cmd_buffer_status {
822    V3DV_CMD_BUFFER_STATUS_NEW           = 0,
823    V3DV_CMD_BUFFER_STATUS_INITIALIZED   = 1,
824    V3DV_CMD_BUFFER_STATUS_RECORDING     = 2,
825    V3DV_CMD_BUFFER_STATUS_EXECUTABLE    = 3
826 };
827 
828 union v3dv_clear_value {
829    uint32_t color[4];
830    struct {
831       float z;
832       uint8_t s;
833    };
834 };
835 
836 struct v3dv_cmd_buffer_attachment_state {
837    /* The original clear value as provided by the Vulkan API */
838    VkClearValue vk_clear_value;
839 
840    /* The hardware clear value */
841    union v3dv_clear_value clear_value;
842 
843    /* The underlying image view (from the framebuffer or, if imageless
844     * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
845     */
846    struct v3dv_image_view *image_view;
847 
848    /* If this is a multisampled attachment with a resolve operation. */
849    bool has_resolve;
850 
851    /* If this is a multisampled attachment with a resolve operation,
852     * whether we can use the TLB for the resolve.
853     */
854    bool use_tlb_resolve;
855 };
856 
857 struct v3dv_viewport_state {
858    uint32_t count;
859    VkViewport viewports[MAX_VIEWPORTS];
860    float translate[MAX_VIEWPORTS][3];
861    float scale[MAX_VIEWPORTS][3];
862 };
863 
864 struct v3dv_scissor_state {
865    uint32_t count;
866    VkRect2D scissors[MAX_SCISSORS];
867 };
868 
869 /* Mostly a v3dv mapping of VkDynamicState, used to track which data as
870  * defined as dynamic
871  */
872 enum v3dv_dynamic_state_bits {
873    V3DV_DYNAMIC_VIEWPORT                  = 1 << 0,
874    V3DV_DYNAMIC_SCISSOR                   = 1 << 1,
875    V3DV_DYNAMIC_STENCIL_COMPARE_MASK      = 1 << 2,
876    V3DV_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 3,
877    V3DV_DYNAMIC_STENCIL_REFERENCE         = 1 << 4,
878    V3DV_DYNAMIC_BLEND_CONSTANTS           = 1 << 5,
879    V3DV_DYNAMIC_DEPTH_BIAS                = 1 << 6,
880    V3DV_DYNAMIC_LINE_WIDTH                = 1 << 7,
881    V3DV_DYNAMIC_COLOR_WRITE_ENABLE        = 1 << 8,
882    V3DV_DYNAMIC_ALL                       = (1 << 9) - 1,
883 };
884 
885 /* Flags for dirty pipeline state.
886  */
887 enum v3dv_cmd_dirty_bits {
888    V3DV_CMD_DIRTY_VIEWPORT                  = 1 << 0,
889    V3DV_CMD_DIRTY_SCISSOR                   = 1 << 1,
890    V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK      = 1 << 2,
891    V3DV_CMD_DIRTY_STENCIL_WRITE_MASK        = 1 << 3,
892    V3DV_CMD_DIRTY_STENCIL_REFERENCE         = 1 << 4,
893    V3DV_CMD_DIRTY_PIPELINE                  = 1 << 5,
894    V3DV_CMD_DIRTY_COMPUTE_PIPELINE          = 1 << 6,
895    V3DV_CMD_DIRTY_VERTEX_BUFFER             = 1 << 7,
896    V3DV_CMD_DIRTY_INDEX_BUFFER              = 1 << 8,
897    V3DV_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 9,
898    V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS   = 1 << 10,
899    V3DV_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 11,
900    V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO        = 1 << 12,
901    V3DV_CMD_DIRTY_BLEND_CONSTANTS           = 1 << 13,
902    V3DV_CMD_DIRTY_OCCLUSION_QUERY           = 1 << 14,
903    V3DV_CMD_DIRTY_DEPTH_BIAS                = 1 << 15,
904    V3DV_CMD_DIRTY_LINE_WIDTH                = 1 << 16,
905    V3DV_CMD_DIRTY_VIEW_INDEX                = 1 << 17,
906    V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE        = 1 << 18,
907 };
908 
909 struct v3dv_dynamic_state {
910    /**
911     * Bitmask of (1 << VK_DYNAMIC_STATE_*).
912     * Defines the set of saved dynamic state.
913     */
914    uint32_t mask;
915 
916    struct v3dv_viewport_state viewport;
917 
918    struct v3dv_scissor_state scissor;
919 
920    struct {
921       uint32_t front;
922       uint32_t back;
923    } stencil_compare_mask;
924 
925    struct {
926       uint32_t front;
927       uint32_t back;
928    } stencil_write_mask;
929 
930    struct {
931       uint32_t front;
932       uint32_t back;
933    } stencil_reference;
934 
935    float blend_constants[4];
936 
937    struct {
938       float constant_factor;
939       float depth_bias_clamp;
940       float slope_factor;
941    } depth_bias;
942 
943    float line_width;
944 
945    uint32_t color_write_enable;
946 };
947 
948 void v3dv_viewport_compute_xform(const VkViewport *viewport,
949                                  float scale[3],
950                                  float translate[3]);
951 
952 enum v3dv_ez_state {
953    V3D_EZ_UNDECIDED = 0,
954    V3D_EZ_GT_GE,
955    V3D_EZ_LT_LE,
956    V3D_EZ_DISABLED,
957 };
958 
959 enum v3dv_job_type {
960    V3DV_JOB_TYPE_GPU_CL = 0,
961    V3DV_JOB_TYPE_GPU_CL_SECONDARY,
962    V3DV_JOB_TYPE_GPU_TFU,
963    V3DV_JOB_TYPE_GPU_CSD,
964    V3DV_JOB_TYPE_CPU_RESET_QUERIES,
965    V3DV_JOB_TYPE_CPU_END_QUERY,
966    V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
967    V3DV_JOB_TYPE_CPU_SET_EVENT,
968    V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
969    V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
970    V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
971    V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
972 };
973 
974 struct v3dv_reset_query_cpu_job_info {
975    struct v3dv_query_pool *pool;
976    uint32_t first;
977    uint32_t count;
978 };
979 
980 struct v3dv_end_query_cpu_job_info {
981    struct v3dv_query_pool *pool;
982    uint32_t query;
983 
984    /* This is one unless multiview is used */
985    uint32_t count;
986 };
987 
988 struct v3dv_copy_query_results_cpu_job_info {
989    struct v3dv_query_pool *pool;
990    uint32_t first;
991    uint32_t count;
992    struct v3dv_buffer *dst;
993    uint32_t offset;
994    uint32_t stride;
995    VkQueryResultFlags flags;
996 };
997 
998 struct v3dv_submit_sync_info {
999    /* List of syncs to wait before running a job */
1000    uint32_t wait_count;
1001    struct vk_sync_wait *waits;
1002 
1003    /* List of syncs to signal when all jobs complete */
1004    uint32_t signal_count;
1005    struct vk_sync_signal *signals;
1006 };
1007 
1008 struct v3dv_event_set_cpu_job_info {
1009    struct v3dv_event *event;
1010    int state;
1011 };
1012 
1013 struct v3dv_event_wait_cpu_job_info {
1014    /* List of events to wait on */
1015    uint32_t event_count;
1016    struct v3dv_event **events;
1017 };
1018 
1019 struct v3dv_copy_buffer_to_image_cpu_job_info {
1020    struct v3dv_image *image;
1021    struct v3dv_buffer *buffer;
1022    uint32_t buffer_offset;
1023    uint32_t buffer_stride;
1024    uint32_t buffer_layer_stride;
1025    VkOffset3D image_offset;
1026    VkExtent3D image_extent;
1027    uint32_t mip_level;
1028    uint32_t base_layer;
1029    uint32_t layer_count;
1030 };
1031 
1032 struct v3dv_csd_indirect_cpu_job_info {
1033    struct v3dv_buffer *buffer;
1034    uint32_t offset;
1035    struct v3dv_job *csd_job;
1036    uint32_t wg_size;
1037    uint32_t *wg_uniform_offsets[3];
1038    bool needs_wg_uniform_rewrite;
1039 };
1040 
1041 struct v3dv_timestamp_query_cpu_job_info {
1042    struct v3dv_query_pool *pool;
1043    uint32_t query;
1044 
1045    /* This is one unless multiview is used */
1046    uint32_t count;
1047 };
1048 
1049 /* Number of perfmons required to handle all supported performance counters */
1050 #define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_PERFCNT_NUM, \
1051                                        DRM_V3D_MAX_PERF_COUNTERS)
1052 
1053 struct v3dv_perf_query {
1054    uint32_t kperfmon_ids[V3DV_MAX_PERFMONS];
1055 
1056    /* A DRM syncobj to wait on the GPU jobs for which we are collecting
1057     * performance data.
1058     */
1059    struct vk_sync *last_job_sync;
1060 };
1061 
1062 struct v3dv_job {
1063    struct list_head list_link;
1064 
1065    /* We only create job clones when executing secondary command buffers into
1066     * primaries. These clones don't make deep copies of the original object
1067     * so we want to flag them to avoid freeing resources they don't own.
1068     */
1069    bool is_clone;
1070 
1071    /* If the job executes on the transfer stage of the pipeline */
1072    bool is_transfer;
1073 
1074    /* VK_KHR_buffer_device_address allows shaders to use pointers that can
1075     * dereference memory in any buffer that has been flagged with
1076     * VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR. These buffers may not
1077     * be bound via descriptor sets, so we need to make sure that a job that
1078     * uses this functionality includes all these buffers in its kernel
1079     * submission.
1080     */
1081    bool uses_buffer_device_address;
1082 
1083    enum v3dv_job_type type;
1084 
1085    struct v3dv_device *device;
1086 
1087    struct v3dv_cmd_buffer *cmd_buffer;
1088 
1089    struct v3dv_cl bcl;
1090    struct v3dv_cl rcl;
1091    struct v3dv_cl indirect;
1092 
1093    /* Set of all BOs referenced by the job. This will be used for making
1094     * the list of BOs that the kernel will need to have paged in to
1095     * execute our job.
1096     */
1097    struct set *bos;
1098    uint32_t bo_count;
1099    uint64_t bo_handle_mask;
1100 
1101    struct v3dv_bo *tile_alloc;
1102    struct v3dv_bo *tile_state;
1103 
1104    bool tmu_dirty_rcl;
1105 
1106    uint32_t first_subpass;
1107 
1108    /* When the current subpass is split into multiple jobs, this flag is set
1109     * to true for any jobs after the first in the same subpass.
1110     */
1111    bool is_subpass_continue;
1112 
1113    /* If this job is the last job emitted for a subpass. */
1114    bool is_subpass_finish;
1115 
1116    struct v3dv_frame_tiling frame_tiling;
1117 
1118    enum v3dv_ez_state ez_state;
1119    enum v3dv_ez_state first_ez_state;
1120 
1121    /* If we have already decided if we need to disable Early Z/S completely
1122     * for this job.
1123     */
1124    bool decided_global_ez_enable;
1125 
1126    /* If the job emitted any draw calls with Early Z/S enabled */
1127    bool has_ez_draws;
1128 
1129    /* If this job has been configured to use early Z/S clear */
1130    bool early_zs_clear;
1131 
1132    /* Number of draw calls recorded into the job */
1133    uint32_t draw_count;
1134 
1135    /* A flag indicating whether we want to flush every draw separately. This
1136     * can be used for debugging, or for cases where special circumstances
1137     * require this behavior.
1138     */
1139    bool always_flush;
1140 
1141    /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
1142     * can use this to select the hw queues where we need to serialize the job.
1143     */
1144    uint8_t serialize;
1145 
1146    /* If this is a CL job, whether we should sync before binning */
1147    bool needs_bcl_sync;
1148 
1149    /* Job specs for CPU jobs */
1150    union {
1151       struct v3dv_reset_query_cpu_job_info          query_reset;
1152       struct v3dv_end_query_cpu_job_info            query_end;
1153       struct v3dv_copy_query_results_cpu_job_info   query_copy_results;
1154       struct v3dv_event_set_cpu_job_info            event_set;
1155       struct v3dv_event_wait_cpu_job_info           event_wait;
1156       struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
1157       struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
1158       struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
1159    } cpu;
1160 
1161    /* Job specs for TFU jobs */
1162    struct drm_v3d_submit_tfu tfu;
1163 
1164    /* Job specs for CSD jobs */
1165    struct {
1166       struct v3dv_bo *shared_memory;
1167       uint32_t wg_count[3];
1168       uint32_t wg_base[3];
1169       struct drm_v3d_submit_csd submit;
1170    } csd;
1171 
1172    /* Perfmons with last job sync for CSD and CL jobs */
1173    struct v3dv_perf_query *perf;
1174 };
1175 
1176 void v3dv_job_init(struct v3dv_job *job,
1177                    enum v3dv_job_type type,
1178                    struct v3dv_device *device,
1179                    struct v3dv_cmd_buffer *cmd_buffer,
1180                    int32_t subpass_idx);
1181 void v3dv_job_destroy(struct v3dv_job *job);
1182 
1183 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1184 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1185 
1186 void v3dv_job_start_frame(struct v3dv_job *job,
1187                           uint32_t width,
1188                           uint32_t height,
1189                           uint32_t layers,
1190                           bool allocate_tile_state_for_all_layers,
1191                           uint32_t render_target_count,
1192                           uint8_t max_internal_bpp,
1193                           bool msaa);
1194 
1195 bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1196 
1197 struct v3dv_job *
1198 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1199                              struct v3dv_cmd_buffer *cmd_buffer);
1200 
1201 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1202                                                 enum v3dv_job_type type,
1203                                                 struct v3dv_cmd_buffer *cmd_buffer,
1204                                                 uint32_t subpass_idx);
1205 
1206 void
1207 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1208                                    uint32_t slot_size,
1209                                    uint32_t used_count,
1210                                    uint32_t *alloc_count,
1211                                    void **ptr);
1212 
1213 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
1214                                    bool indexed, bool indirect);
1215 
1216 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1217  * cmd_buffer specific header?
1218  */
1219 struct v3dv_draw_info {
1220    uint32_t vertex_count;
1221    uint32_t instance_count;
1222    uint32_t first_vertex;
1223    uint32_t first_instance;
1224 };
1225 
1226 struct v3dv_vertex_binding {
1227    struct v3dv_buffer *buffer;
1228    VkDeviceSize offset;
1229 };
1230 
1231 struct v3dv_descriptor_state {
1232    struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1233    uint32_t valid;
1234    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1235 };
1236 
1237 struct v3dv_cmd_pipeline_state {
1238    struct v3dv_pipeline *pipeline;
1239 
1240    struct v3dv_descriptor_state descriptor_state;
1241 };
1242 
1243 enum {
1244    V3DV_BARRIER_GRAPHICS_BIT = (1 << 0),
1245    V3DV_BARRIER_COMPUTE_BIT  = (1 << 1),
1246    V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
1247 };
1248 #define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
1249                           V3DV_BARRIER_TRANSFER_BIT | \
1250                           V3DV_BARRIER_COMPUTE_BIT);
1251 
1252 struct v3dv_barrier_state {
1253    /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
1254    uint8_t dst_mask;
1255 
1256    /* For each possible consumer of a barrier, a mask of V3DV_BARRIER_*
1257     * indicating the sources of the dependency.
1258     */
1259    uint8_t src_mask_graphics;
1260    uint8_t src_mask_transfer;
1261    uint8_t src_mask_compute;
1262 
1263    /* For graphics barriers, access masks involved. Used to decide if we need
1264     * to execute a binning or render barrier.
1265     */
1266    VkAccessFlags bcl_buffer_access;
1267    VkAccessFlags bcl_image_access;
1268 };
1269 
1270 struct v3dv_cmd_buffer_state {
1271    struct v3dv_render_pass *pass;
1272    struct v3dv_framebuffer *framebuffer;
1273    VkRect2D render_area;
1274 
1275    /* Current job being recorded */
1276    struct v3dv_job *job;
1277 
1278    uint32_t subpass_idx;
1279 
1280    struct v3dv_cmd_pipeline_state gfx;
1281    struct v3dv_cmd_pipeline_state compute;
1282 
1283    struct v3dv_dynamic_state dynamic;
1284 
1285    uint32_t dirty;
1286    VkShaderStageFlagBits dirty_descriptor_stages;
1287    VkShaderStageFlagBits dirty_push_constants_stages;
1288 
1289    /* Current clip window. We use this to check whether we have an active
1290     * scissor, since in that case we can't use TLB clears and need to fallback
1291     * to drawing rects.
1292     */
1293    VkRect2D clip_window;
1294 
1295    /* Whether our render area is aligned to tile boundaries. If this is false
1296     * then we have tiles that are only partially covered by the render area,
1297     * and therefore, we need to be careful with our loads and stores so we don't
1298     * modify pixels for the tile area that is not covered by the render area.
1299     * This means, for example, that we can't use the TLB to clear, since that
1300     * always clears full tiles.
1301     */
1302    bool tile_aligned_render_area;
1303 
1304    /* FIXME: we have just one client-side BO for the push constants,
1305     * independently of the stageFlags in vkCmdPushConstants, and the
1306     * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1307     * tunning in the future if it makes sense.
1308     */
1309    uint32_t push_constants_size;
1310    uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1311 
1312    uint32_t attachment_alloc_count;
1313    struct v3dv_cmd_buffer_attachment_state *attachments;
1314 
1315    struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1316 
1317    struct {
1318       VkBuffer buffer;
1319       VkDeviceSize offset;
1320       uint8_t index_size;
1321    } index_buffer;
1322 
1323    /* Current uniforms */
1324    struct {
1325       struct v3dv_cl_reloc vs_bin;
1326       struct v3dv_cl_reloc vs;
1327       struct v3dv_cl_reloc gs_bin;
1328       struct v3dv_cl_reloc gs;
1329       struct v3dv_cl_reloc fs;
1330    } uniforms;
1331 
1332    /* Current view index for multiview rendering */
1333    uint32_t view_index;
1334 
1335    /* Used to flag OOM conditions during command buffer recording */
1336    bool oom;
1337 
1338    /* If we are currently recording job(s) for a transfer operation */
1339    bool is_transfer;
1340 
1341    /* Barrier state tracking */
1342    struct v3dv_barrier_state barrier;
1343 
1344    /* Secondary command buffer state */
1345    struct {
1346       bool occlusion_query_enable;
1347    } inheritance;
1348 
1349    /* Command buffer state saved during a meta operation */
1350    struct {
1351       uint32_t subpass_idx;
1352       VkRenderPass pass;
1353       VkFramebuffer framebuffer;
1354 
1355       uint32_t attachment_alloc_count;
1356       uint32_t attachment_count;
1357       struct v3dv_cmd_buffer_attachment_state *attachments;
1358 
1359       bool tile_aligned_render_area;
1360       VkRect2D render_area;
1361 
1362       struct v3dv_dynamic_state dynamic;
1363 
1364       struct v3dv_cmd_pipeline_state gfx;
1365       bool has_descriptor_state;
1366 
1367       uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1368       uint32_t push_constants_size;
1369    } meta;
1370 
1371    /* Command buffer state for queries */
1372    struct {
1373       /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1374        * a render pass. We queue these here and then schedule the corresponding
1375        * CPU jobs for them at the time we finish the GPU job in which they have
1376        * been recorded.
1377        */
1378       struct {
1379          uint32_t used_count;
1380          uint32_t alloc_count;
1381          struct v3dv_end_query_cpu_job_info *states;
1382       } end;
1383 
1384       struct {
1385          /* This BO is not NULL if we have an active occlusion query, that is,
1386           * we have called vkCmdBeginQuery but not vkCmdEndQuery.
1387           */
1388          struct v3dv_bo *bo;
1389          uint32_t offset;
1390 
1391          /* This pointer is not NULL if we have an active performance query */
1392          struct v3dv_perf_query *perf;
1393       } active_query;
1394    } query;
1395 };
1396 
1397 /* The following struct represents the info from a descriptor that we store on
1398  * the host memory. They are mostly links to other existing vulkan objects,
1399  * like the image_view in order to access to swizzle info, or the buffer used
1400  * for a UBO/SSBO, for example.
1401  *
1402  * FIXME: revisit if makes sense to just move everything that would be needed
1403  * from a descriptor to the bo.
1404  */
1405 struct v3dv_descriptor {
1406    VkDescriptorType type;
1407 
1408    union {
1409       struct {
1410          struct v3dv_image_view *image_view;
1411          struct v3dv_sampler *sampler;
1412       };
1413 
1414       struct {
1415          struct v3dv_buffer *buffer;
1416          size_t offset;
1417          size_t range;
1418       };
1419 
1420       struct v3dv_buffer_view *buffer_view;
1421    };
1422 };
1423 
1424 struct v3dv_query {
1425    bool maybe_available;
1426    union {
1427       /* Used by GPU queries (occlusion) */
1428       struct {
1429          struct v3dv_bo *bo;
1430          uint32_t offset;
1431       };
1432       /* Used by CPU queries (timestamp) */
1433       uint64_t value;
1434 
1435       /* Used by performance queries */
1436       struct v3dv_perf_query perf;
1437    };
1438 };
1439 
1440 struct v3dv_query_pool {
1441    struct vk_object_base base;
1442 
1443    struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
1444 
1445    /* Only used with performance queries */
1446    struct {
1447       uint32_t ncounters;
1448       uint8_t counters[V3D_PERFCNT_NUM];
1449 
1450       /* V3D has a limit on the number of counters we can track in a
1451        * single performance monitor, so if too many counters are requested
1452        * we need to create multiple monitors to record all of them. This
1453        * field represents the number of monitors required for the number
1454        * of counters requested.
1455        */
1456       uint8_t nperfmons;
1457    } perfmon;
1458 
1459    VkQueryType query_type;
1460    uint32_t query_count;
1461    struct v3dv_query *queries;
1462 };
1463 
1464 VkResult v3dv_get_query_pool_results(struct v3dv_device *device,
1465                                      struct v3dv_query_pool *pool,
1466                                      uint32_t first,
1467                                      uint32_t count,
1468                                      void *data,
1469                                      VkDeviceSize stride,
1470                                      VkQueryResultFlags flags);
1471 
1472 void v3dv_reset_query_pools(struct v3dv_device *device,
1473                             struct v3dv_query_pool *query_pool,
1474                             uint32_t first,
1475                             uint32_t last);
1476 
1477 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1478                                                        uint64_t pobj,
1479                                                        VkAllocationCallbacks *alloc);
1480 struct v3dv_cmd_buffer_private_obj {
1481    struct list_head list_link;
1482    uint64_t obj;
1483    v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1484 };
1485 
1486 struct v3dv_cmd_buffer {
1487    struct vk_command_buffer vk;
1488 
1489    struct v3dv_device *device;
1490 
1491    /* Used at submit time to link command buffers in the submission that have
1492     * spawned wait threads, so we can then wait on all of them to complete
1493     * before we process any signal sempahores or fences.
1494     */
1495    struct list_head list_link;
1496 
1497    VkCommandBufferUsageFlags usage_flags;
1498 
1499    enum v3dv_cmd_buffer_status status;
1500 
1501    struct v3dv_cmd_buffer_state state;
1502 
1503    /* Buffer where we upload push constant data to resolve indirect indexing */
1504    struct v3dv_cl_reloc push_constants_resource;
1505 
1506    /* Collection of Vulkan objects created internally by the driver (typically
1507     * during recording of meta operations) that are part of the command buffer
1508     * and should be destroyed with it.
1509     */
1510    struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1511 
1512    /* Per-command buffer resources for meta operations. */
1513    struct {
1514       struct {
1515          /* The current descriptor pool for blit sources */
1516          VkDescriptorPool dspool;
1517       } blit;
1518       struct {
1519          /* The current descriptor pool for texel buffer copy sources */
1520          VkDescriptorPool dspool;
1521       } texel_buffer_copy;
1522    } meta;
1523 
1524    /* List of jobs in the command buffer. For primary command buffers it
1525     * represents the jobs we want to submit to the GPU. For secondary command
1526     * buffers it represents jobs that will be merged into a primary command
1527     * buffer via vkCmdExecuteCommands.
1528     */
1529    struct list_head jobs;
1530 };
1531 
1532 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1533                                            int32_t subpass_idx,
1534                                            enum v3dv_job_type type);
1535 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1536 
1537 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1538                                                uint32_t subpass_idx);
1539 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1540                                                 uint32_t subpass_idx);
1541 
1542 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1543 
1544 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1545                                      bool push_descriptor_state);
1546 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1547                                     uint32_t dirty_dynamic_state,
1548                                     bool needs_subpass_resume);
1549 
1550 void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
1551                                    struct v3dv_query_pool *pool,
1552                                    uint32_t first,
1553                                    uint32_t count);
1554 
1555 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1556                                  struct v3dv_query_pool *pool,
1557                                  uint32_t query,
1558                                  VkQueryControlFlags flags);
1559 
1560 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1561                                struct v3dv_query_pool *pool,
1562                                uint32_t query);
1563 
1564 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1565                                         struct v3dv_query_pool *pool,
1566                                         uint32_t first,
1567                                         uint32_t count,
1568                                         struct v3dv_buffer *dst,
1569                                         uint32_t offset,
1570                                         uint32_t stride,
1571                                         VkQueryResultFlags flags);
1572 
1573 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1574                                  struct drm_v3d_submit_tfu *tfu);
1575 
1576 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
1577                                               const uint32_t *wg_counts);
1578 
1579 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1580                                      uint64_t obj,
1581                                      v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1582 
1583 void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst,
1584                                          struct v3dv_barrier_state *src);
1585 
1586 struct v3dv_event {
1587    struct vk_object_base base;
1588    int state;
1589 };
1590 
1591 struct v3dv_shader_variant {
1592    enum broadcom_shader_stage stage;
1593 
1594    union {
1595       struct v3d_prog_data *base;
1596       struct v3d_vs_prog_data *vs;
1597       struct v3d_gs_prog_data *gs;
1598       struct v3d_fs_prog_data *fs;
1599       struct v3d_compute_prog_data *cs;
1600    } prog_data;
1601 
1602    /* We explicitly save the prog_data_size as it would make easier to
1603     * serialize
1604     */
1605    uint32_t prog_data_size;
1606 
1607    /* The assembly for this variant will be uploaded to a BO shared with all
1608     * other shader stages in that pipeline. This is the offset in that BO.
1609     */
1610    uint32_t assembly_offset;
1611 
1612    /* Note: it is really likely that qpu_insts would be NULL, as it will be
1613     * used only temporarily, to upload it to the shared bo, as we compile the
1614     * different stages individually.
1615     */
1616    uint64_t *qpu_insts;
1617    uint32_t qpu_insts_size;
1618 };
1619 
1620 /*
1621  * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1622  * other methods doesn't have so many parameters.
1623  *
1624  * FIXME: for the case of the coordinate shader and the vertex shader, module,
1625  * entrypoint, spec_info and nir are the same. There are also info only
1626  * relevant to some stages. But seemed too much a hassle to create a new
1627  * struct only to handle that. Revisit if such kind of info starts to grow.
1628  */
1629 struct v3dv_pipeline_stage {
1630    struct v3dv_pipeline *pipeline;
1631 
1632    enum broadcom_shader_stage stage;
1633 
1634    const struct vk_shader_module *module;
1635    const char *entrypoint;
1636    const VkSpecializationInfo *spec_info;
1637 
1638    nir_shader *nir;
1639 
1640    /* The following is the combined hash of module+entrypoint+spec_info+nir */
1641    unsigned char shader_sha1[20];
1642 
1643    /** A name for this program, so you can track it in shader-db output. */
1644    uint32_t program_id;
1645 
1646    VkPipelineCreationFeedback feedback;
1647 };
1648 
1649 /* We are using the descriptor pool entry for two things:
1650  * * Track the allocated sets, so we can properly free it if needed
1651  * * Track the suballocated pool bo regions, so if some descriptor set is
1652  *   freed, the gap could be reallocated later.
1653  *
1654  * Those only make sense if the pool was not created with the flag
1655  * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1656  */
1657 struct v3dv_descriptor_pool_entry
1658 {
1659    struct v3dv_descriptor_set *set;
1660    /* Offset and size of the subregion allocated for this entry from the
1661     * pool->bo
1662     */
1663    uint32_t offset;
1664    uint32_t size;
1665 };
1666 
1667 struct v3dv_descriptor_pool {
1668    struct vk_object_base base;
1669 
1670    /* A list with all descriptor sets allocated from the pool. */
1671    struct list_head set_list;
1672 
1673    /* If this descriptor pool has been allocated for the driver for internal
1674     * use, typically to implement meta operations.
1675     */
1676    bool is_driver_internal;
1677 
1678    struct v3dv_bo *bo;
1679    /* Current offset at the descriptor bo. 0 means that we didn't use it for
1680     * any descriptor. If the descriptor bo is NULL, current offset is
1681     * meaningless
1682     */
1683    uint32_t current_offset;
1684 
1685    /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1686     * descriptor sets are handled as a whole as pool memory and handled by the
1687     * following pointers. If set, they are not used, and individually
1688     * descriptor sets are allocated/freed.
1689     */
1690    uint8_t *host_memory_base;
1691    uint8_t *host_memory_ptr;
1692    uint8_t *host_memory_end;
1693 
1694    uint32_t entry_count;
1695    uint32_t max_entry_count;
1696    struct v3dv_descriptor_pool_entry entries[0];
1697 };
1698 
1699 struct v3dv_descriptor_set {
1700    struct vk_object_base base;
1701 
1702    /* List link into the list of all sets allocated from the pool */
1703    struct list_head pool_link;
1704 
1705    struct v3dv_descriptor_pool *pool;
1706 
1707    struct v3dv_descriptor_set_layout *layout;
1708 
1709    /* Offset relative to the descriptor pool bo for this set */
1710    uint32_t base_offset;
1711 
1712    /* The descriptors below can be indexed (set/binding) using the set_layout
1713     */
1714    struct v3dv_descriptor descriptors[0];
1715 };
1716 
1717 struct v3dv_descriptor_set_binding_layout {
1718    VkDescriptorType type;
1719 
1720    /* Number of array elements in this binding */
1721    uint32_t array_size;
1722 
1723    /* Index into the flattend descriptor set */
1724    uint32_t descriptor_index;
1725 
1726    uint32_t dynamic_offset_count;
1727    uint32_t dynamic_offset_index;
1728 
1729    /* Offset into the descriptor set where this descriptor lives (final offset
1730     * on the descriptor bo need to take into account set->base_offset)
1731     */
1732    uint32_t descriptor_offset;
1733 
1734    /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
1735     * if there are no immutable samplers.
1736     */
1737    uint32_t immutable_samplers_offset;
1738 };
1739 
1740 struct v3dv_descriptor_set_layout {
1741    struct vk_object_base base;
1742 
1743    VkDescriptorSetLayoutCreateFlags flags;
1744 
1745    /* Number of bindings in this descriptor set */
1746    uint32_t binding_count;
1747 
1748    /* Total bo size needed for this descriptor set
1749     */
1750    uint32_t bo_size;
1751 
1752    /* Shader stages affected by this descriptor set */
1753    uint16_t shader_stages;
1754 
1755    /* Number of descriptors in this descriptor set */
1756    uint32_t descriptor_count;
1757 
1758    /* Number of dynamic offsets used by this descriptor set */
1759    uint16_t dynamic_offset_count;
1760 
1761    /* Descriptor set layouts can be destroyed even if they are still being
1762     * used.
1763     */
1764    uint32_t ref_cnt;
1765 
1766    /* Bindings in this descriptor set */
1767    struct v3dv_descriptor_set_binding_layout binding[0];
1768 };
1769 
1770 void
1771 v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
1772                                    struct v3dv_descriptor_set_layout *set_layout);
1773 
1774 static inline void
v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout * set_layout)1775 v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
1776 {
1777    assert(set_layout && set_layout->ref_cnt >= 1);
1778    p_atomic_inc(&set_layout->ref_cnt);
1779 }
1780 
1781 static inline void
v3dv_descriptor_set_layout_unref(struct v3dv_device * device,struct v3dv_descriptor_set_layout * set_layout)1782 v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
1783                                  struct v3dv_descriptor_set_layout *set_layout)
1784 {
1785    assert(set_layout && set_layout->ref_cnt >= 1);
1786    if (p_atomic_dec_zero(&set_layout->ref_cnt))
1787       v3dv_descriptor_set_layout_destroy(device, set_layout);
1788 }
1789 
1790 struct v3dv_pipeline_layout {
1791    struct vk_object_base base;
1792 
1793    struct {
1794       struct v3dv_descriptor_set_layout *layout;
1795       uint32_t dynamic_offset_start;
1796    } set[MAX_SETS];
1797 
1798    uint32_t num_sets;
1799 
1800    /* Shader stages that are declared to use descriptors from this layout */
1801    uint32_t shader_stages;
1802 
1803    uint32_t dynamic_offset_count;
1804    uint32_t push_constant_size;
1805 
1806    unsigned char sha1[20];
1807 };
1808 
1809 /*
1810  * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
1811  * it to be big enough to include the max value for all of them.
1812  *
1813  * FIXME: one alternative would be to allocate the map as big as you need for
1814  * each descriptor type. That would means more individual allocations.
1815  */
1816 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS,                         \
1817                                  MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
1818                                  MAX_STORAGE_BUFFERS)
1819 
1820 
1821 struct v3dv_descriptor_map {
1822    /* TODO: avoid fixed size array/justify the size */
1823    unsigned num_desc; /* Number of descriptors  */
1824    int set[DESCRIPTOR_MAP_SIZE];
1825    int binding[DESCRIPTOR_MAP_SIZE];
1826    int array_index[DESCRIPTOR_MAP_SIZE];
1827    int array_size[DESCRIPTOR_MAP_SIZE];
1828    bool used[DESCRIPTOR_MAP_SIZE];
1829 
1830    /* NOTE: the following is only for sampler, but this is the easier place to
1831     * put it.
1832     */
1833    uint8_t return_size[DESCRIPTOR_MAP_SIZE];
1834 };
1835 
1836 struct v3dv_sampler {
1837    struct vk_object_base base;
1838 
1839    bool compare_enable;
1840    bool unnormalized_coordinates;
1841    bool clamp_to_transparent_black_border;
1842 
1843    /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
1844     * configuration. If needed it will be copied to the descriptor info during
1845     * UpdateDescriptorSets
1846     */
1847    uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
1848 };
1849 
1850 struct v3dv_descriptor_template_entry {
1851    /* The type of descriptor in this entry */
1852    VkDescriptorType type;
1853 
1854    /* Binding in the descriptor set */
1855    uint32_t binding;
1856 
1857    /* Offset at which to write into the descriptor set binding */
1858    uint32_t array_element;
1859 
1860    /* Number of elements to write into the descriptor set binding */
1861    uint32_t array_count;
1862 
1863    /* Offset into the user provided data */
1864    size_t offset;
1865 
1866    /* Stride between elements into the user provided data */
1867    size_t stride;
1868 };
1869 
1870 struct v3dv_descriptor_update_template {
1871    struct vk_object_base base;
1872 
1873    VkPipelineBindPoint bind_point;
1874 
1875    /* The descriptor set this template corresponds to. This value is only
1876     * valid if the template was created with the templateType
1877     * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
1878     */
1879    uint8_t set;
1880 
1881    /* Number of entries in this template */
1882    uint32_t entry_count;
1883 
1884    /* Entries of the template */
1885    struct v3dv_descriptor_template_entry entries[0];
1886 };
1887 
1888 
1889 /* We keep two special values for the sampler idx that represents exactly when a
1890  * sampler is not needed/provided. The main use is that even if we don't have
1891  * sampler, we still need to do the output unpacking (through
1892  * nir_lower_tex). The easier way to do this is to add those special "no
1893  * sampler" in the sampler_map, and then use the proper unpacking for that
1894  * case.
1895  *
1896  * We have one when we want a 16bit output size, and other when we want a
1897  * 32bit output size. We use the info coming from the RelaxedPrecision
1898  * decoration to decide between one and the other.
1899  */
1900 #define V3DV_NO_SAMPLER_16BIT_IDX 0
1901 #define V3DV_NO_SAMPLER_32BIT_IDX 1
1902 
1903 /*
1904  * Following two methods are using on the combined to/from texture/sampler
1905  * indices maps at v3dv_pipeline.
1906  */
1907 static inline uint32_t
v3dv_pipeline_combined_index_key_create(uint32_t texture_index,uint32_t sampler_index)1908 v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
1909                                         uint32_t sampler_index)
1910 {
1911    return texture_index << 24 | sampler_index;
1912 }
1913 
1914 static inline void
v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,uint32_t * texture_index,uint32_t * sampler_index)1915 v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
1916                                         uint32_t *texture_index,
1917                                         uint32_t *sampler_index)
1918 {
1919    uint32_t texture = combined_index_key >> 24;
1920    uint32_t sampler = combined_index_key & 0xffffff;
1921 
1922    if (texture_index)
1923       *texture_index = texture;
1924 
1925    if (sampler_index)
1926       *sampler_index = sampler;
1927 }
1928 
1929 struct v3dv_descriptor_maps {
1930    struct v3dv_descriptor_map ubo_map;
1931    struct v3dv_descriptor_map ssbo_map;
1932    struct v3dv_descriptor_map sampler_map;
1933    struct v3dv_descriptor_map texture_map;
1934 };
1935 
1936 /* The structure represents data shared between different objects, like the
1937  * pipeline and the pipeline cache, so we ref count it to know when it should
1938  * be freed.
1939  */
1940 struct v3dv_pipeline_shared_data {
1941    uint32_t ref_cnt;
1942 
1943    unsigned char sha1_key[20];
1944 
1945    struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
1946    struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
1947 
1948    struct v3dv_bo *assembly_bo;
1949 };
1950 
1951 struct v3dv_pipeline_executable_data {
1952    enum broadcom_shader_stage stage;
1953    char *nir_str;
1954    char *qpu_str;
1955 };
1956 
1957 struct v3dv_pipeline {
1958    struct vk_object_base base;
1959 
1960    struct v3dv_device *device;
1961 
1962    VkShaderStageFlags active_stages;
1963 
1964    struct v3dv_render_pass *pass;
1965    struct v3dv_subpass *subpass;
1966 
1967    /* Note: We can't use just a MESA_SHADER_STAGES array because we also need
1968     * to track binning shaders. Note these will be freed once the pipeline
1969     * has been compiled.
1970     */
1971    struct v3dv_pipeline_stage *vs;
1972    struct v3dv_pipeline_stage *vs_bin;
1973    struct v3dv_pipeline_stage *gs;
1974    struct v3dv_pipeline_stage *gs_bin;
1975    struct v3dv_pipeline_stage *fs;
1976    struct v3dv_pipeline_stage *cs;
1977 
1978    /* Flags for whether optional pipeline stages are present, for convenience */
1979    bool has_gs;
1980 
1981    /* Whether any stage in this pipeline uses VK_KHR_buffer_device_address */
1982    bool uses_buffer_device_address;
1983 
1984    /* Spilling memory requirements */
1985    struct {
1986       struct v3dv_bo *bo;
1987       uint32_t size_per_thread;
1988    } spill;
1989 
1990    struct v3dv_dynamic_state dynamic_state;
1991 
1992    struct v3dv_pipeline_layout *layout;
1993 
1994    /* Whether this pipeline enables depth writes */
1995    bool z_updates_enable;
1996 
1997    enum v3dv_ez_state ez_state;
1998 
1999    /* If ez_state is V3D_EZ_DISABLED, if the reason for disabling is that the
2000     * pipeline selects an incompatible depth test function.
2001     */
2002    bool incompatible_ez_test;
2003 
2004    bool msaa;
2005    bool sample_rate_shading;
2006    uint32_t sample_mask;
2007 
2008    bool primitive_restart;
2009 
2010    /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
2011     * array with such binding
2012     */
2013    struct v3dv_pipeline_vertex_binding {
2014       uint32_t stride;
2015       uint32_t instance_divisor;
2016    } vb[MAX_VBS];
2017    uint32_t vb_count;
2018 
2019    /* Note that a lot of info from VkVertexInputAttributeDescription is
2020     * already prepacked, so here we are only storing those that need recheck
2021     * later. The array must be indexed by driver location, since that is the
2022     * order in which we need to emit the attributes.
2023     */
2024    struct v3dv_pipeline_vertex_attrib {
2025       uint32_t binding;
2026       uint32_t offset;
2027       VkFormat vk_format;
2028    } va[MAX_VERTEX_ATTRIBS];
2029    uint32_t va_count;
2030 
2031    enum pipe_prim_type topology;
2032 
2033    struct v3dv_pipeline_shared_data *shared_data;
2034 
2035    /* It is the combined stages sha1, layout sha1, plus the pipeline key sha1. */
2036    unsigned char sha1[20];
2037 
2038    /* In general we can reuse v3dv_device->default_attribute_float, so note
2039     * that the following can be NULL.
2040     *
2041     * FIXME: the content of this BO will be small, so it could be improved to
2042     * be uploaded to a common BO. But as in most cases it will be NULL, it is
2043     * not a priority.
2044     */
2045    struct v3dv_bo *default_attribute_values;
2046 
2047    struct vpm_config vpm_cfg;
2048    struct vpm_config vpm_cfg_bin;
2049 
2050    /* If the pipeline should emit any of the stencil configuration packets */
2051    bool emit_stencil_cfg[2];
2052 
2053    /* Blend state */
2054    struct {
2055       /* Per-RT bit mask with blend enables */
2056       uint8_t enables;
2057       /* Per-RT prepacked blend config packets */
2058       uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
2059       /* Flag indicating whether the blend factors in use require
2060        * color constants.
2061        */
2062       bool needs_color_constants;
2063       /* Mask with enabled color channels for each RT (4 bits per RT) */
2064       uint32_t color_write_masks;
2065    } blend;
2066 
2067    /* Depth bias */
2068    struct {
2069       bool enabled;
2070       bool is_z16;
2071    } depth_bias;
2072 
2073    struct {
2074       void *mem_ctx;
2075       bool has_data;
2076       struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */
2077    } executables;
2078 
2079    /* Packets prepacked during pipeline creation
2080     */
2081    uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
2082    uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
2083    uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
2084    uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
2085                         MAX_VERTEX_ATTRIBS];
2086    uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
2087 };
2088 
2089 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)2090 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
2091 {
2092    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
2093           !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
2094    return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
2095       VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
2096 }
2097 
2098 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)2099 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
2100                                      struct v3dv_pipeline *pipeline)
2101 {
2102    if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
2103       return &cmd_buffer->state.compute.descriptor_state;
2104    else
2105       return &cmd_buffer->state.gfx.descriptor_state;
2106 }
2107 
2108 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
2109 
2110 uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
2111 uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
2112 
2113 #define v3dv_debug_ignored_stype(sType) \
2114    mesa_logd("%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
2115 
2116 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
2117 uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
2118 const struct v3dv_format *
2119 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
2120                                uint32_t bpp, VkFormat *out_vk_format);
2121 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
2122                                           VkFormat vk_format,
2123                                           VkFormatFeatureFlags2 features);
2124 
2125 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2126                                          struct v3dv_pipeline *pipeline,
2127                                          struct v3dv_shader_variant *variant);
2128 
2129 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
2130                                                     struct v3dv_pipeline *pipeline,
2131                                                     struct v3dv_shader_variant *variant,
2132                                                     uint32_t **wg_count_offsets);
2133 
2134 struct v3dv_shader_variant *
2135 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
2136                         struct v3dv_pipeline_cache *cache,
2137                         struct v3d_key *key,
2138                         size_t key_size,
2139                         const VkAllocationCallbacks *pAllocator,
2140                         VkResult *out_vk_result);
2141 
2142 struct v3dv_shader_variant *
2143 v3dv_shader_variant_create(struct v3dv_device *device,
2144                            enum broadcom_shader_stage stage,
2145                            struct v3d_prog_data *prog_data,
2146                            uint32_t prog_data_size,
2147                            uint32_t assembly_offset,
2148                            uint64_t *qpu_insts,
2149                            uint32_t qpu_insts_size,
2150                            VkResult *out_vk_result);
2151 
2152 void
2153 v3dv_shader_variant_destroy(struct v3dv_device *device,
2154                             struct v3dv_shader_variant *variant);
2155 
2156 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)2157 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2158 {
2159    assert(shared_data && shared_data->ref_cnt >= 1);
2160    p_atomic_inc(&shared_data->ref_cnt);
2161 }
2162 
2163 void
2164 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2165                                   struct v3dv_pipeline_shared_data *shared_data);
2166 
2167 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)2168 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2169                                 struct v3dv_pipeline_shared_data *shared_data)
2170 {
2171    assert(shared_data && shared_data->ref_cnt >= 1);
2172    if (p_atomic_dec_zero(&shared_data->ref_cnt))
2173       v3dv_pipeline_shared_data_destroy(device, shared_data);
2174 }
2175 
2176 struct v3dv_descriptor *
2177 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2178                                    struct v3dv_descriptor_map *map,
2179                                    struct v3dv_pipeline_layout *pipeline_layout,
2180                                    uint32_t index,
2181                                    uint32_t *dynamic_offset);
2182 
2183 struct v3dv_cl_reloc
2184 v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2185                                       struct v3dv_descriptor_state *descriptor_state,
2186                                       struct v3dv_descriptor_map *map,
2187                                       struct v3dv_pipeline_layout *pipeline_layout,
2188                                       uint32_t index,
2189                                       VkDescriptorType *out_type);
2190 
2191 const struct v3dv_sampler *
2192 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2193                                 struct v3dv_descriptor_map *map,
2194                                 struct v3dv_pipeline_layout *pipeline_layout,
2195                                 uint32_t index);
2196 
2197 struct v3dv_cl_reloc
2198 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2199                                       struct v3dv_descriptor_state *descriptor_state,
2200                                       struct v3dv_descriptor_map *map,
2201                                       struct v3dv_pipeline_layout *pipeline_layout,
2202                                       uint32_t index);
2203 
2204 struct v3dv_cl_reloc
2205 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2206                                              struct v3dv_descriptor_state *descriptor_state,
2207                                              struct v3dv_descriptor_map *map,
2208                                              struct v3dv_pipeline_layout *pipeline_layout,
2209                                              uint32_t index);
2210 
2211 struct v3dv_bo*
2212 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2213                                    struct v3dv_descriptor_map *map,
2214                                    struct v3dv_pipeline_layout *pipeline_layout,
2215                                    uint32_t index);
2216 
2217 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)2218 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2219                         const struct v3dv_descriptor_set_binding_layout *binding)
2220 {
2221    assert(binding->immutable_samplers_offset);
2222    return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2223 }
2224 
2225 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2226                               struct v3dv_device *device,
2227                               VkPipelineCacheCreateFlags,
2228                               bool cache_enabled);
2229 
2230 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2231 
2232 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2233                                     struct v3dv_pipeline_cache *cache,
2234                                     nir_shader *nir,
2235                                     unsigned char sha1_key[20]);
2236 
2237 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2238                                                struct v3dv_pipeline_cache *cache,
2239                                                const nir_shader_compiler_options *nir_options,
2240                                                unsigned char sha1_key[20]);
2241 
2242 struct v3dv_pipeline_shared_data *
2243 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2244                                         unsigned char sha1_key[20],
2245                                         bool *cache_hit);
2246 
2247 void
2248 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2249                                     struct v3dv_pipeline_cache *cache);
2250 
2251 struct v3dv_bo *
2252 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2253                                               struct v3dv_pipeline *pipeline);
2254 
2255 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle)			\
2256    VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2257 
2258 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2259                        VK_OBJECT_TYPE_COMMAND_BUFFER)
2260 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2261 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2262                        VK_OBJECT_TYPE_INSTANCE)
2263 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2264                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2265 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2266 
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer,base,VkBuffer,VK_OBJECT_TYPE_BUFFER)2267 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2268                                VK_OBJECT_TYPE_BUFFER)
2269 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2270                                VK_OBJECT_TYPE_BUFFER_VIEW)
2271 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
2272                                VK_OBJECT_TYPE_DEVICE_MEMORY)
2273 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2274                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2275 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2276                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
2277 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2278                                VkDescriptorSetLayout,
2279                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2280 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
2281                                VkDescriptorUpdateTemplate,
2282                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2283 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2284 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2285                                VK_OBJECT_TYPE_FRAMEBUFFER)
2286 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2287                                VK_OBJECT_TYPE_IMAGE)
2288 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2289                                VK_OBJECT_TYPE_IMAGE_VIEW)
2290 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2291                                VK_OBJECT_TYPE_PIPELINE)
2292 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2293                                VK_OBJECT_TYPE_PIPELINE_CACHE)
2294 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2295                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2296 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2297                                VK_OBJECT_TYPE_QUERY_POOL)
2298 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2299                                VK_OBJECT_TYPE_RENDER_PASS)
2300 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2301                                VK_OBJECT_TYPE_SAMPLER)
2302 
2303 static inline int
2304 v3dv_ioctl(int fd, unsigned long request, void *arg)
2305 {
2306    if (using_v3d_simulator)
2307       return v3d_simulator_ioctl(fd, request, arg);
2308    else
2309       return drmIoctl(fd, request, arg);
2310 }
2311 
2312 /* Flags OOM conditions in command buffer state.
2313  *
2314  * Note: notice that no-op jobs don't have a command buffer reference.
2315  */
2316 static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_job * job)2317 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2318 {
2319    if (cmd_buffer) {
2320       cmd_buffer->state.oom = true;
2321    } else {
2322       assert(job);
2323       if (job->cmd_buffer)
2324          job->cmd_buffer->state.oom = true;
2325    }
2326 }
2327 
2328 #define v3dv_return_if_oom(_cmd_buffer, _job) do {                  \
2329    const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer;        \
2330    if (__cmd_buffer && __cmd_buffer->state.oom)                     \
2331       return;                                                       \
2332    const struct v3dv_job *__job = _job;                             \
2333    if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom)  \
2334       return;                                                       \
2335 } while(0)                                                          \
2336 
2337 static inline uint32_t
u64_hash(const void * key)2338 u64_hash(const void *key)
2339 {
2340    return _mesa_hash_data(key, sizeof(uint64_t));
2341 }
2342 
2343 static inline bool
u64_compare(const void * key1,const void * key2)2344 u64_compare(const void *key1, const void *key2)
2345 {
2346    return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2347 }
2348 
2349 /* Helper to call hw ver speficic functions */
2350 #define v3dv_X(device, thing) ({                      \
2351    __typeof(&v3d42_##thing) v3d_X_thing;              \
2352    switch (device->devinfo.ver) {                     \
2353    case 42:                                           \
2354       v3d_X_thing = &v3d42_##thing;                   \
2355       break;                                          \
2356    default:                                           \
2357       unreachable("Unsupported hardware generation"); \
2358    }                                                  \
2359    v3d_X_thing;                                       \
2360 })
2361 
2362 
2363 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2364  * define v3dX for each version supported, because when we compile code that
2365  * is not version-specific, all version-specific macros need to be already
2366  * defined.
2367  */
2368 #ifdef v3dX
2369 #  include "v3dvx_private.h"
2370 #else
2371 #  define v3dX(x) v3d42_##x
2372 #  include "v3dvx_private.h"
2373 #  undef v3dX
2374 #endif
2375 
2376 #ifdef ANDROID
2377 VkResult
2378 v3dv_gralloc_info(struct v3dv_device *device,
2379                   const VkNativeBufferANDROID *gralloc_info,
2380                   int *out_dmabuf,
2381                   int *out_stride,
2382                   int *out_size,
2383                   uint64_t *out_modifier);
2384 
2385 VkResult
2386 v3dv_import_native_buffer_fd(VkDevice device_h,
2387                              int dma_buf,
2388                              const VkAllocationCallbacks *alloc,
2389                              VkImage image_h);
2390 #endif /* ANDROID */
2391 
2392 #endif /* V3DV_PRIVATE_H */
2393